diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a21f589..818d423 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -347,6 +347,7 @@ public class HiveConf extends Configuration { // Optimizer HIVEOPTCP("hive.optimize.cp", true), // column pruner + HIVEOPTINDEXFILTER("hive.optimize.index.filter", false), // automatically use indexes HIVEOPTPPD("hive.optimize.ppd", true), // predicate pushdown // push predicates down to storage handlers HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true), @@ -355,6 +356,10 @@ public class HiveConf extends Configuration { HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true), + // Indexes + HIVEOPTINDEXFILTER_COMPACT_MINSIZE("hive.optimize.index.filter.compact.minSize", (long) 5 * 1024 * 1024 * 1024), // 5G + HIVEOPTINDEXFILTER_COMPACT_MAXSIZE("hive.optimize.index.filter.compact.maxSize", (long) -1), // infinity + // Statistics HIVESTATSAUTOGATHER("hive.stats.autogather", true), HIVESTATSDBCLASS("hive.stats.dbclass", diff --git conf/hive-default.xml conf/hive-default.xml index c42197f..35aab5a 100644 --- conf/hive-default.xml +++ conf/hive-default.xml @@ -325,6 +325,12 @@ + hive.optimize.index.filter + false + Whether to enable automatic use of indexes + + + hive.optimize.ppd true Whether to enable predicate pushdown @@ -984,6 +990,19 @@ + hive.optimize.index.filter.compact.minSize + 5368709120 + Minimum size (in bytes) of the inputs on which a compact index is automatically used. + + + + hive.optimize.index.filter.compact.maxSize + -1 + Maximum size (in bytes) of the inputs on which a compact index is automatically used. + A negative number is equivalent to infinity. + + + hive.exim.uri.scheme.whitelist hdfs,pfile A comma separated list of acceptable URI schemes for import and export. diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 14015d0..0fde98d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -313,6 +313,10 @@ public class Driver implements CommandProcessor { * The SQL query to compile. */ public int compile(String command) { + return compile(command, true); + } + + public int compile(String command, boolean resetTaskIds) { Utilities.PerfLogBegin(LOG, "compile"); @@ -321,7 +325,9 @@ public class Driver implements CommandProcessor { plan = null; } + if (resetTaskIds) { TaskFactory.resetId(); + } try { command = new VariableSubstitution().substitute(conf,command); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java index 6437385..c5c8ddc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java @@ -273,9 +273,13 @@ public class ExecDriver extends Task implements Serializable, Hadoop job.setNumReduceTasks(work.getNumReduceTasks().intValue()); job.setReducerClass(ExecReducer.class); + // Set hive input format, and input format file if necessary. if (work.getInputformat() != null) { HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat()); } + if (work.getIndexIntermediateFile() != null) { + job.set("hive.index.compact.file", work.getIndexIntermediateFile()); + } // Turn on speculative execution for reducers boolean useSpeculativeExecReducers = HiveConf.getBoolVar(job, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index c02d90b..0fd63f5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -22,6 +22,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; @@ -53,6 +54,8 @@ public class TableScanOperator extends Operator implements private transient String partitionSpecs; private transient boolean inputFileChanged = false; private TableDesc tableDesc; + private String indexInputFormat; + private String indexIntermediateFile; public TableDesc getTableDesc() { @@ -63,6 +66,26 @@ public class TableScanOperator extends Operator implements this.tableDesc = tableDesc; } + public String getIndexInputFormat() { + return indexInputFormat; + } + + public void setIndexInputFormat(String indexInputFormat) { + this.indexInputFormat = indexInputFormat; + } + + public String getIndexIntermediateFile() { + return indexIntermediateFile; + } + + public void setIndexIntermediateFile(String fileName) { + this.indexIntermediateFile = fileName; + } + + public boolean usesIndex() { + return StringUtils.isNotEmpty(indexInputFormat) && StringUtils.isNotEmpty(indexIntermediateFile); + } + /** * Other than gathering statistics for the ANALYZE command, the table scan operator * does not do anything special other than just forwarding the row. Since the table diff --git ql/src/java/org/apache/hadoop/hive/ql/index/AbstractIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/AbstractIndexHandler.java index dd0186d..9050fb8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/AbstractIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/AbstractIndexHandler.java @@ -20,8 +20,12 @@ package org.apache.hadoop.hive.ql.index; import java.util.List; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; /** * Abstract base class for index handlers. This is provided as insulation @@ -42,4 +46,13 @@ public abstract class AbstractIndexHandler implements HiveIndexHandler { return sb.toString(); } + public void generateIndexQuery(Index index, ExprNodeDesc predicate, + ParseContext pctx, HiveIndexQueryContext queryContext) { + queryContext.setQueryTasks(null); + return; + } + + public boolean checkQuerySize(long inputSize, HiveConf conf) { + return false; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java index 411b78f..4f259c9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java @@ -22,12 +22,15 @@ import java.util.List; import java.util.Set; import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; /** * HiveIndexHandler defines a pluggable interface for adding new index handlers @@ -114,4 +117,23 @@ public interface HiveIndexHandler extends Configurable { Set inputs, Set outputs) throws HiveException; + /** + * Generate the list of tasks required to run an index sub-query for the + * given predicate, using the given index + * @param index + * @param predicate + * @param parseContext + * @param queryContext contains results, such as query tasks and input configuration + */ + void generateIndexQuery(Index index, ExprNodeDesc predicate, + ParseContext pctx, HiveIndexQueryContext queryContext); + + /** + * Check the size of an input query to make sure it fits within the bounds + * + * @param inputSize size (in bytes) of the query in question + * @param conf + * @return true if query is within the bounds + */ + boolean checkQuerySize(long inputSize, HiveConf conf); } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java new file mode 100644 index 0000000..bb7d8a3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.index; + +import java.util.HashSet; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; + +/** + * Used to pass information during query processing + * + */ +public class HiveIndexQueryContext { + + private HashSet additionalSemanticInputs; + private String indexInputFormat; + private String indexIntermediateFile; + private List> queryTasks; + private ExprNodeDesc residualPredicate; + + public HiveIndexQueryContext() { + this.additionalSemanticInputs = null; + this.indexInputFormat = null; + this.indexIntermediateFile = null; + this.queryTasks = null; + } + + public HashSet getAdditionalSemanticInputs() { + return additionalSemanticInputs; + } + public void addAdditionalSemanticInputs(HashSet additionalParseInputs) { + if (this.additionalSemanticInputs == null) { + this.additionalSemanticInputs = new HashSet(); + } + this.additionalSemanticInputs.addAll(additionalParseInputs); + } + + public String getIndexInputFormat() { + return indexInputFormat; + } + public void setIndexInputFormat(String indexInputFormat) { + this.indexInputFormat = indexInputFormat; + } + + public String getIndexIntermediateFile() { + return indexIntermediateFile; + } + public void setIndexIntermediateFile(String indexIntermediateFile) { + this.indexIntermediateFile = indexIntermediateFile; + } + + public List> getQueryTasks() { + return queryTasks; + } + public void setQueryTasks(List> indexQueryTasks) { + this.queryTasks = indexQueryTasks; + } + + public void setResidualPredicate(ExprNodeDesc residualPredicate) { + this.residualPredicate = residualPredicate; + } + + public ExprNodeDesc getResidualPredicate() { + return residualPredicate; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java index 1f01446..19c45ff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java @@ -19,16 +19,16 @@ package org.apache.hadoop.hive.ql.index.compact; import java.util.ArrayList; -import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Set; import java.util.Map.Entry; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; @@ -39,18 +39,30 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.index.AbstractIndexHandler; -import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.index.HiveIndexQueryContext; +import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer; +import org.apache.hadoop.hive.ql.index.IndexSearchCondition; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate; +import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; public class CompactIndexHandler extends AbstractIndexHandler { private Configuration configuration; + private static final Log LOG = LogFactory.getLog(CompactIndexHandler.class.getName()); + @Override public void analyzeIndexDefinition(Table baseTable, Index index, @@ -201,6 +213,105 @@ public class CompactIndexHandler extends AbstractIndexHandler { } @Override + public void generateIndexQuery(Index index, ExprNodeDesc predicate, + ParseContext pctx, HiveIndexQueryContext queryContext) { + + DecomposedPredicate decomposedPredicate = decomposePredicate(predicate, index); + + if (decomposedPredicate == null) { + queryContext.setQueryTasks(null); + return; // abort if we couldn't pull out anything from the predicate + } + + // pass residual predicate back out for further processing + queryContext.setResidualPredicate(decomposedPredicate.residualPredicate); + + // Build reentrant QL for index query + StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY "); + + String tmpFile = pctx.getContext().getMRTmpFileURI(); + qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name + qlCommand.append("SELECT `_bucketname` , `_offsets` FROM "); + qlCommand.append(HiveUtils.unparseIdentifier(index.getIndexTableName())); + qlCommand.append(" WHERE "); + + String predicateString = decomposedPredicate.pushedPredicate.getExprString(); + qlCommand.append(predicateString); + + // generate tasks from index query string + LOG.info("Generating tasks for re-entrant QL query: " + qlCommand.toString()); + Driver driver = new Driver(pctx.getConf()); + driver.compile(qlCommand.toString(), false); + + // setup TableScanOperator to change input format for original query + queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName()); + queryContext.setIndexIntermediateFile(tmpFile); + + queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs()); + queryContext.setQueryTasks(driver.getPlan().getRootTasks()); + return; + } + + /** + * Split the predicate into the piece we can deal with (pushed), and the one we can't (residual) + * @param predicate + * @param index + * @return + */ + private DecomposedPredicate decomposePredicate(ExprNodeDesc predicate, Index index) { + IndexPredicateAnalyzer analyzer = getIndexPredicateAnalyzer(index); + List searchConditions = new ArrayList(); + // split predicate into pushed (what we can handle), and residual (what we can't handle) + ExprNodeDesc residualPredicate = analyzer.analyzePredicate(predicate, searchConditions); + + if (searchConditions.size() == 0) { + return null; + } + + DecomposedPredicate decomposedPredicate = new DecomposedPredicate(); + decomposedPredicate.pushedPredicate = analyzer.translateSearchConditions(searchConditions); + decomposedPredicate.residualPredicate = residualPredicate; + + return decomposedPredicate; + } + + /** + * Instantiate a new predicate analyzer suitable for determining + * whether we can use an index, based on rules for indexes in + * WHERE clauses that we support + * + * @return preconfigured predicate analyzer for WHERE queries + */ + private IndexPredicateAnalyzer getIndexPredicateAnalyzer(Index index) { + IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer(); + + analyzer.addComparisonOp(GenericUDFOPEqual.class.getName()); + analyzer.addComparisonOp(GenericUDFOPLessThan.class.getName()); + analyzer.addComparisonOp(GenericUDFOPEqualOrLessThan.class.getName()); + analyzer.addComparisonOp(GenericUDFOPGreaterThan.class.getName()); + analyzer.addComparisonOp(GenericUDFOPEqualOrGreaterThan.class.getName()); + + // only return results for columns in this index + List columnSchemas = index.getSd().getCols(); + for (FieldSchema column : columnSchemas) { + analyzer.allowColumnName(column.getName()); + } + + return analyzer; + } + + + @Override + public boolean checkQuerySize(long querySize, HiveConf hiveConf) { + long minSize = hiveConf.getLongVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER_COMPACT_MINSIZE); + long maxSize = hiveConf.getLongVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER_COMPACT_MAXSIZE); + if (maxSize < 0) { + maxSize = Long.MAX_VALUE; + } + return (querySize > minSize & querySize < maxSize); + } + + @Override public boolean usesIndexTable() { return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 50db44c..223adcc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.ProtectMode; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SerDeInfo; @@ -813,4 +814,13 @@ public class Table implements Serializable { public String getCompleteName() { return getDbName() + "@" + getTableName(); } + + /** + * @return List containing Indexes names if there are indexes on this table + * @throws HiveException + **/ + public List getAllIndexes(short max) throws HiveException { + Hive hive = Hive.get(); + return hive.getIndexes(getTTable().getDbName(), getTTable().getTableName(), max); + } }; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java index 6162676..63f0f3e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java @@ -33,11 +33,11 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QBParseInfo; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.StatsWork; /** @@ -69,6 +69,16 @@ public class GenMRTableScan1 implements NodeProcessor { ctx.setCurrTask(currTask); ctx.setCurrTopOp(currTopOp); + // Reset the inputFormat and inputFormatFile if the table scan needs a different one. + if (op.usesIndex()) { + String indexInputFormat = op.getIndexInputFormat(); + String indexIntermediateFile = op.getIndexIntermediateFile(); + MapredWork currentWork = (MapredWork) currTask.getWork(); + currentWork.setInputformat(indexInputFormat); + currentWork.setIndexIntermediateFile(indexIntermediateFile); + } + + for (String alias : parseCtx.getTopOps().keySet()) { Operator currOp = parseCtx.getTopOps().get(alias); if (currOp == op) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/IndexWhereResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/IndexWhereResolver.java new file mode 100644 index 0000000..d67bd90 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/IndexWhereResolver.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import java.util.ArrayList; + +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.optimizer.physical.index.IndexWhereTaskDispatcher; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +public class IndexWhereResolver implements PhysicalPlanResolver { + + @Override + public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticException { + //Dispatcher dispatcher = getDispatcher(physicalContext); + Dispatcher dispatcher = new IndexWhereTaskDispatcher(physicalContext); + GraphWalker opGraphWalker = new DefaultGraphWalker(dispatcher); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(physicalContext.rootTasks); + opGraphWalker.startWalking(topNodes, null); + + return physicalContext; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java index 0ae9fa2..ba0cfeb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java @@ -52,6 +52,9 @@ public class PhysicalOptimizer { if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) { resolvers.add(new CommonJoinResolver()); } + if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER)) { + resolvers.add(new IndexWhereResolver()); + } resolvers.add(new MapJoinResolver()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcCtx.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcCtx.java new file mode 100644 index 0000000..608fa69 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcCtx.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical.index; + +import java.io.Serializable; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.ParseContext; + +public class IndexWhereProcCtx implements NodeProcessorCtx { + + private static final Log LOG = LogFactory.getLog(IndexWhereProcCtx.class.getName()); + + private final Task currentTask; + private final ParseContext parseCtx; + + public IndexWhereProcCtx(Task task, ParseContext parseCtx) { + this.currentTask = task; + this.parseCtx = parseCtx; + } + + public ParseContext getParseContext() { + return parseCtx; + } + + public Task getCurrentTask() { + return currentTask; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java new file mode 100644 index 0000000..fb3875f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java @@ -0,0 +1,246 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical.index; + +import java.io.IOException; +import java.io.Serializable; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.MapRedTask; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.index.HiveIndexHandler; +import org.apache.hadoop.hive.ql.index.HiveIndexQueryContext; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc; + +/** +* +* IndexWhereProcessor. +* Processes Operator Nodes to look for WHERE queries with a predicate column +* on which we have an index. Creates an index subquery Task for these +* WHERE queries to use the index automatically. +*/ +public class IndexWhereProcessor implements NodeProcessor { + + private static final Log LOG = LogFactory.getLog(IndexWhereProcessor.class.getName()); + private final Map> indexes; + + public IndexWhereProcessor(Map> indexes) { + super(); + this.indexes = indexes; + } + + @Override + /** + * Process a node of the operator tree. This matches on the rule in IndexWhereTaskDispatcher + */ + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + FilterOperator operator = (FilterOperator) nd; + FilterDesc operatorDesc = operator.getConf(); + ExprNodeDesc predicate = operatorDesc.getPredicate(); + + IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx; + ParseContext pctx = context.getParseContext(); + + // check if we have indexes on all partitions in this table scan + try { + if (!arePartitionsCoveredByIndex(operator, pctx)) { + //return null; + } + } catch (HiveException e) { + LOG.error("Fatal Error: problem accessing metastore"); + throw new SemanticException(e); + } + + // get potential reentrant index queries from each index + Map queryContexts = new HashMap(); + for (List indexesOnTable : indexes.values()) { + for (Index index : indexesOnTable) { + HiveIndexQueryContext queryContext = new HiveIndexQueryContext(); + rewriteForIndex(predicate, index, pctx, context.getCurrentTask(), queryContext); + List> indexTasks = queryContext.getQueryTasks(); + + if (indexTasks != null && indexTasks.size() > 0) + { + queryContexts.put(index, queryContext); + } + } + } + + // choose an index rewrite to use + if (queryContexts.size() > 0) { + // TODO This would be a good place for some sort of cost based choice? + Index chosenIndex = queryContexts.keySet().iterator().next(); + + // modify the parse context to use indexing + // we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times + HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex); + + // prepare the operator to use indexing + TableScanOperator originalTblScan = (TableScanOperator) pctx.getTopOps().get(chosenIndex.getOrigTableName()); + + originalTblScan.setIndexInputFormat(queryContext.getIndexInputFormat()); + originalTblScan.setIndexIntermediateFile(queryContext.getIndexIntermediateFile()); + + // modify inputs based on index query + Set inputs = pctx.getSemanticInputs(); + inputs.addAll(queryContext.getAdditionalSemanticInputs()); + + List> chosenRewrite = queryContext.getQueryTasks(); + + // add dependencies so index query runs first + insertIndexQuery(pctx, context, chosenRewrite); + } + + return null; + } + + /** + * Get a list of Tasks to activate use of indexes. + * Generate the tasks for the index query (where we store results of + * querying the index in a tmp file) inside the IndexHandler + * @param task + */ + private void rewriteForIndex(ExprNodeDesc predicate, Index index, + ParseContext pctx, Task task, + HiveIndexQueryContext queryContext) + throws SemanticException { + HiveIndexHandler indexHandler; + try { + indexHandler = HiveUtils.getIndexHandler(pctx.getConf(), index.getIndexHandlerClass()); + } catch (HiveException e) { + LOG.error("Exception while loading IndexHandler: " + index.getIndexHandlerClass()); + throw new SemanticException("Failed to load indexHandler: " + index.getIndexHandlerClass(), e); + } + + // check the size + try { + ContentSummary inputSummary = Utilities.getInputSummary(pctx.getContext(), ((MapRedTask) task).getWork(), null); + long inputSize = inputSummary.getLength(); + if (!indexHandler.checkQuerySize(inputSize, pctx.getConf())) { + queryContext.setQueryTasks(null); + return; + } + } catch (IOException e) { + throw new SemanticException("Failed to get task size", e); + } + + // use the IndexHandler to generate the index query + indexHandler.generateIndexQuery(index, predicate, pctx, queryContext); + // TODO HIVE-2115 use queryContext.residualPredicate to process residual predicate + + return; + } + + /** + * Check the partitions used by the table scan + * @param pctx + * @param operator + * @return true if all partitions being accessed are present in the index table + */ + private boolean arePartitionsCoveredByIndex(FilterOperator operator, ParseContext pctx) throws HiveException { + TableScanOperator tableScan = (TableScanOperator) operator.getParentOperators().get(0); + Hive hive = Hive.get(pctx.getConf()); + + // make sure each partition exists on the index table + PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan); + Set queryPartitions = queryPartitionList.getConfirmedPartns(); + + for (Partition part : queryPartitions) { + Table partitionedTable = part.getTable(); + if (! indexes.containsKey(partitionedTable)) { + return false; // something is wrong if the partition's table is not indexed + } + // every partition's table should have the same partition keys as the index table + Set indexPartitions = new HashSet(); + for (Index index : indexes.get(partitionedTable)) { + Table indexTable = hive.getTable(index.getIndexTableName()); + indexPartitions.addAll(hive.getPartitions(indexTable)); + } + + // there are no matching partitions on any index + if (!indexPartitions.contains(part)) { + return false; + } + } + + return true; + } + + /** + * Insert the rewrite tasks at the head of the pctx task tree + * @param pctx + * @param context + * @param chosenRewrite + */ + private void insertIndexQuery(ParseContext pctx, IndexWhereProcCtx context, List> chosenRewrite) { + Task wholeTableScan = context.getCurrentTask(); + LinkedHashSet> rewriteLeaves = new LinkedHashSet>(); + findLeaves(chosenRewrite, rewriteLeaves); + + for (Task leaf : rewriteLeaves) { + leaf.addDependentTask(wholeTableScan); // add full scan task as child for every index query task + } + + // replace the original with the index sub-query as a root task + pctx.replaceRootTask(wholeTableScan, chosenRewrite); + } + + /** + * Find the leaves of the task tree + */ + private void findLeaves(List> tasks, Set> leaves) { + for (Task t : tasks) { + if (t.getDependentTasks() == null) { + leaves.add(t); + } else { + findLeaves(t.getDependentTasks(), leaves); + } + } + } + +} + diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java new file mode 100644 index 0000000..0d3728c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical.index; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +/** + * + * IndexWhereTaskDispatcher. Walks a Task tree, and for the right kind of Task, + * walks the operator tree to create an index subquery. Then attaches the + * subquery task to the task tree. + * + */ +public class IndexWhereTaskDispatcher implements Dispatcher { + + private final PhysicalContext physicalContext; + + public IndexWhereTaskDispatcher(PhysicalContext context) { + super(); + physicalContext = context; + } + + @Override + public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) + throws SemanticException { + + Task task = (Task) nd; + + ParseContext pctx = physicalContext.getParseContext(); + + // create the regex's so the walker can recognize our WHERE queries + Map operatorRules = createOperatorRules(pctx); + + // check for no indexes on any table + if (operatorRules == null) { + return null; + } + + // create context so the walker can carry the current task with it. + IndexWhereProcCtx indexWhereOptimizeCtx = new IndexWhereProcCtx(task, pctx); + + // create the dispatcher, which fires the processor according to the rule that + // best matches + Dispatcher dispatcher = new DefaultRuleDispatcher(getDefaultProcessor(), + operatorRules, + indexWhereOptimizeCtx); + + // walk the mapper operator(not task) tree + GraphWalker ogw = new DefaultGraphWalker(dispatcher); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + return null; + } + + /** + * Create a set of rules that only matches WHERE predicates on columns we have + * an index on. + * @return + */ + private Map createOperatorRules(ParseContext pctx) throws SemanticException { + Map operatorRules = new LinkedHashMap(); + + List supportedIndexes = new ArrayList(); + supportedIndexes.add(CompactIndexHandler.class.getName()); + + // query the metastore to know what columns we have indexed + Collection topTables = pctx.getTopToTable().values(); + Map> indexes = new HashMap>(); + for (Table tbl : topTables) + { + List tblIndexes = getIndexes(tbl, supportedIndexes); + if (tblIndexes.size() > 0) { + indexes.put(tbl, tblIndexes); + } + } + + // quit if our tables don't have any indexes + if (indexes.size() == 0) { + return null; + } + + // FIL% is a filter operator, a WHERE shows up as a filter on a table scan operator (TS%) + operatorRules.put(new RuleRegExp("RULEWhere", "TS%FIL%"), new IndexWhereProcessor(indexes)); + + return operatorRules; + } + + /** + * Get a list of indexes on a table that match given types. + * Copied from HIVE-1694 patch + */ + private List getIndexes(Table baseTableMetaData, List matchIndexTypes) + throws SemanticException { + List matchingIndexes = new ArrayList(); + List indexesOnTable = null; + + try { + indexesOnTable = baseTableMetaData.getAllIndexes((short) -1); // get all indexes + } catch (HiveException e) { + throw new SemanticException("Error accessing metastore", e); + } + + for (Index index : indexesOnTable) { + String indexType = index.getIndexHandlerClass(); + if (matchIndexTypes.contains(indexType)) { + matchingIndexes.add(index); + } + } + return matchingIndexes; + } + + private NodeProcessor getDefaultProcessor() { + return new NodeProcessor() { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + return null; + } + }; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 937a7b3..4a591c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.parse; import java.io.Serializable; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -33,7 +34,9 @@ import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.LineageInfo; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -91,6 +94,9 @@ public class ParseContext { // a map-reduce job private boolean hasNonPartCols; + private HashSet semanticInputs; + private List> rootTasks; + public ParseContext() { } @@ -126,6 +132,8 @@ public class ParseContext { * list of map join operators with no reducer * @param opToSamplePruner * operator to sample pruner map + * @param semanticInputs + * @param rootTasks */ public ParseContext( HiveConf conf, @@ -143,7 +151,8 @@ public class ParseContext { UnionProcContext uCtx, List> listMapJoinOpsNoReducer, Map> groupOpToInputTables, Map prunedPartitions, - HashMap opToSamplePruner) { + HashMap opToSamplePruner, + HashSet semanticInputs, List> rootTasks) { this.conf = conf; this.qb = qb; this.ast = ast; @@ -166,6 +175,8 @@ public class ParseContext { this.groupOpToInputTables = groupOpToInputTables; this.prunedPartitions = prunedPartitions; this.opToSamplePruner = opToSamplePruner; + this.semanticInputs = semanticInputs; + this.rootTasks = rootTasks; } /** @@ -486,4 +497,14 @@ public class ParseContext { public void setMapJoinContext(Map mapJoinContext) { this.mapJoinContext = mapJoinContext; } + + public HashSet getSemanticInputs() { + return semanticInputs; + } + + public void replaceRootTask(Task rootTask, + List> tasks) { + this.rootTasks.remove(rootTask); + this.rootTasks.addAll(tasks); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f0aca84..fac40a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -262,7 +262,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, - opToSamplePruner); + opToSamplePruner, inputs, rootTasks); } @SuppressWarnings("nls") @@ -6623,7 +6623,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { opToPartList, topOps, topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, - opToSamplePruner); + opToSamplePruner,inputs, rootTasks); Optimizer optm = new Optimizer(); optm.setPctx(pCtx); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index 73391e9..6e661e4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -70,6 +70,7 @@ public class MapredWork implements Serializable { private MapredLocalWork mapLocalWork; private String inputformat; + private String indexIntermediateFile; private boolean gatheringStats; private String tmpHDFSFileURI; @@ -363,10 +364,18 @@ public class MapredWork implements Serializable { return inputformat; } + public String getIndexIntermediateFile() { + return indexIntermediateFile; + } + public void setInputformat(String inputformat) { this.inputformat = inputformat; } + public void setIndexIntermediateFile(String fileName) { + this.indexIntermediateFile = fileName; + } + public void setGatheringStats(boolean gatherStats) { this.gatheringStats = gatherStats; } diff --git ql/src/test/queries/clientpositive/index_auto.q ql/src/test/queries/clientpositive/index_auto.q new file mode 100644 index 0000000..36ab038 --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto.q @@ -0,0 +1,28 @@ +-- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key; + +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD; +ALTER INDEX src_index ON src REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; + +-- manual indexing +INSERT OVERWRITE DIRECTORY "/tmp/index_where" SELECT `_bucketname` , `_offsets` FROM default__src_src_index__ WHERE key > 80 AND key < 100; +SET hive.index.compact.file=/tmp/index_where; +SET hive.optimize.index.filter=false; +SET hive.input.format=org.apache.hadoop.hive.ql.index.compact.HiveCompactIndexInputFormat; + +EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key; +SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minSize=0; + +-- automatic indexing +EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key; +SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key; + +DROP INDEX src_index on src; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/index_auto_file_format.q ql/src/test/queries/clientpositive/index_auto_file_format.q new file mode 100644 index 0000000..f43b57e --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_file_format.q @@ -0,0 +1,19 @@ +-- test automatic use of index on different file formats +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD; +ALTER INDEX src_index ON src REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minSize=0; + +EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key; +SELECT key, value FROM src WHERE key=86 ORDER BY key; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minSize=0; + +EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key; +SELECT key, value FROM src WHERE key=86 ORDER BY key; + +DROP INDEX src_index on src; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/index_auto_multiple.q ql/src/test/queries/clientpositive/index_auto_multiple.q new file mode 100644 index 0000000..cf2818e --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_multiple.q @@ -0,0 +1,14 @@ +-- With multiple indexes, make sure we choose which to use in a consistent order + +CREATE INDEX src_key_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD; +CREATE INDEX src_val_index ON TABLE src(value) as 'COMPACT' WITH DEFERRED REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minSize=0; + +EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key; +SELECT key, value FROM src WHERE key=86 ORDER BY key; + +DROP INDEX src_key_index ON src; +DROP INDEX src_val_index ON src; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/index_auto_partitioned.q ql/src/test/queries/clientpositive/index_auto_partitioned.q new file mode 100644 index 0000000..c79fd50 --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_partitioned.q @@ -0,0 +1,12 @@ +-- test automatic use of index on table with partitions +CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD; +ALTER INDEX src_part_index ON srcpart REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minSize=0; + +EXPLAIN SELECT key, value FROM srcpart WHERE key=86 ORDER BY key; +SELECT key, value FROM srcpart WHERE key=86 ORDER BY key; + +DROP INDEX src_part_index ON srcpart; diff --git ql/src/test/queries/clientpositive/index_auto_unused.q ql/src/test/queries/clientpositive/index_auto_unused.q new file mode 100644 index 0000000..920cd70 --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_unused.q @@ -0,0 +1,46 @@ +-- test cases where the index should not be used automatically + +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD; +ALTER INDEX src_index ON src REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minSize=5368709120; +SET hive.optimize.index.filter.compact.maxSize=-1; + +-- min size too large (src is less than 5G) +EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key; +SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minSize=0; +SET hive.optimize.index.filter.compact.maxSize=1; + +-- max size too small +EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key; +SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minSize=0; +SET hive.optimize.index.filter.compact.maxSize=-1; + +-- OR predicate not supported by compact indexes +EXPLAIN SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key; +SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key; + + SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minSize=0; +SET hive.optimize.index.filter.compact.maxSize=-1; + +-- columns are not covered by indexes +DROP INDEX src_index on src; +CREATE INDEX src_val_index ON TABLE src(value) as 'COMPACT' WITH DEFERRED REBUILD; +ALTER INDEX src_val_index ON src REBUILD; + +EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key; +SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key; + +DROP INDEX src_val_index on src; \ No newline at end of file diff --git ql/src/test/results/clientpositive/index_auto.q.out ql/src/test/results/clientpositive/index_auto.q.out new file mode 100644 index 0000000..713bb40 --- /dev/null +++ ql/src/test/results/clientpositive/index_auto.q.out @@ -0,0 +1,313 @@ +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-48-49_890_8915143803732182055/-mr-10000 +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-48-49_890_8915143803732182055/-mr-10000 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: -- manual indexing +INSERT OVERWRITE DIRECTORY "/tmp/index_where" SELECT `_bucketname` , `_offsets` FROM default__src_src_index__ WHERE key > 80 AND key < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Output: /tmp/index_where +POSTHOOK: query: -- manual indexing +INSERT OVERWRITE DIRECTORY "/tmp/index_where" SELECT `_bucketname` , `_offsets` FROM default__src_src_index__ WHERE key > 80 AND key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Output: /tmp/index_where +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-49-24_225_5627614687777405478/-mr-10000 +POSTHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-49-24_225_5627614687777405478/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-6 depends on stages: Stage-3 , consists of Stage-5, Stage-4 + Stage-5 + Stage-2 depends on stages: Stage-5, Stage-4 + Stage-1 depends on stages: Stage-2 + Stage-4 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + default__src_src_index__ + TableScan + alias: default__src_src_index__ + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Conditional Operator + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true + destination: file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-49-31_422_3293105246070967294/-ext-10000 + + Stage: Stage-2 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-49-31_020_3781399652075665616/-mr-10002 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-49-31_422_3293105246070967294/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-49-31_665_4650797773210786014/-mr-10000 +POSTHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-49-31_665_4650797773210786014/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/index_auto_file_format.q.out ql/src/test/results/clientpositive/index_auto_file_format.q.out new file mode 100644 index 0000000..894a556 --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_file_format.q.out @@ -0,0 +1,292 @@ +PREHOOK: query: -- test automatic use of index on different file formats +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: -- test automatic use of index on different file formats +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-6 depends on stages: Stage-3 , consists of Stage-5, Stage-4 + Stage-5 + Stage-2 depends on stages: Stage-5, Stage-4 + Stage-1 depends on stages: Stage-2 + Stage-4 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + default__src_src_index__ + TableScan + alias: default__src_src_index__ + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Conditional Operator + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true + destination: file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-34-34_553_2035287069819677240/-ext-10000 + + Stage: Stage-2 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-34-33_550_5081238212103104306/-mr-10002 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-34-34_553_2035287069819677240/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-34-35_027_2206378732102210462/-mr-10000 +POSTHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-34-35_027_2206378732102210462/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +86 val_86 +PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-6 depends on stages: Stage-3 , consists of Stage-5, Stage-4 + Stage-5 + Stage-2 depends on stages: Stage-5, Stage-4 + Stage-1 depends on stages: Stage-2 + Stage-4 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + default__src_src_index__ + TableScan + alias: default__src_src_index__ + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Conditional Operator + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true + destination: file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-34-55_926_5572346744193019133/-ext-10000 + + Stage: Stage-2 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-34-55_774_5724664415454979907/-mr-10002 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-34-55_926_5572346744193019133/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-34-56_142_8911654156491576181/-mr-10000 +POSTHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-34-56_142_8911654156491576181/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +86 val_86 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/index_auto_multiple.q.out ql/src/test/results/clientpositive/index_auto_multiple.q.out new file mode 100644 index 0000000..891e6a4 --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_multiple.q.out @@ -0,0 +1,148 @@ +PREHOOK: query: -- With multiple indexes, make sure we choose which to use in a consistent order + +CREATE INDEX src_key_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: -- With multiple indexes, make sure we choose which to use in a consistent order + +CREATE INDEX src_key_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: CREATE INDEX src_val_index ON TABLE src(value) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src_val_index ON TABLE src(value) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-6 depends on stages: Stage-3 , consists of Stage-5, Stage-4 + Stage-5 + Stage-2 depends on stages: Stage-5, Stage-4 + Stage-1 depends on stages: Stage-2 + Stage-4 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + default__src_src_key_index__ + TableScan + alias: default__src_src_key_index__ + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Conditional Operator + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true + destination: file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-38-43_244_5558526114306213706/-ext-10000 + + Stage: Stage-2 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-38-42_125_8958452215776000521/-mr-10002 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-38-43_244_5558526114306213706/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_key_index__ +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-38-43_535_3778249487824217559/-mr-10000 +POSTHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_key_index__ +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-38-43_535_3778249487824217559/-mr-10000 +86 val_86 +PREHOOK: query: DROP INDEX src_key_index ON src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_key_index ON src +POSTHOOK: type: DROPINDEX +PREHOOK: query: DROP INDEX src_val_index ON src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_val_index ON src +POSTHOOK: type: DROPINDEX diff --git ql/src/test/results/clientpositive/index_auto_partitioned.q.out ql/src/test/results/clientpositive/index_auto_partitioned.q.out new file mode 100644 index 0000000..05cc84a --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_partitioned.q.out @@ -0,0 +1,185 @@ +PREHOOK: query: -- test automatic use of index on table with partitions +CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: -- test automatic use of index on table with partitions +CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_part_index ON srcpart REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=11 +PREHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=12 +PREHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=11 +PREHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: query: ALTER INDEX src_part_index ON srcpart REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: EXPLAIN SELECT key, value FROM srcpart WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, value FROM srcpart WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-6 depends on stages: Stage-3 , consists of Stage-5, Stage-4 + Stage-5 + Stage-2 depends on stages: Stage-5, Stage-4 + Stage-1 depends on stages: Stage-2 + Stage-4 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + default__srcpart_src_part_index__ + TableScan + alias: default__srcpart_src_part_index__ + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Conditional Operator + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true + destination: file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-42-19_218_3331226633297238280/-ext-10000 + + Stage: Stage-2 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-42-17_934_3278921987899048358/-mr-10002 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_22-42-19_218_3331226633297238280/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, value FROM srcpart WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=11 +PREHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=12 +PREHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=11 +PREHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-42-19_839_770753444589987409/-mr-10000 +POSTHOOK: query: SELECT key, value FROM srcpart WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_22-42-19_839_770753444589987409/-mr-10000 +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +86 val_86 +86 val_86 +86 val_86 +86 val_86 +PREHOOK: query: DROP INDEX src_part_index ON srcpart +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_part_index ON srcpart +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/index_auto_unused.q.out ql/src/test/results/clientpositive/index_auto_unused.q.out new file mode 100644 index 0000000..976afad --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_unused.q.out @@ -0,0 +1,448 @@ +PREHOOK: query: -- test cases where the index should not be used automatically + +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: -- test cases where the index should not be used automatically + +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: -- min size too large (src is less than 5G) +EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- min size too large (src is less than 5G) +EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_23-01-29_580_520259480275767143/-mr-10000 +POSTHOOK: query: SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_23-01-29_580_520259480275767143/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- max size too small +EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- max size too small +EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_23-01-38_535_2742003636182871225/-mr-10000 +POSTHOOK: query: SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_23-01-38_535_2742003636182871225/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- OR predicate not supported by compact indexes +EXPLAIN SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- OR predicate not supported by compact indexes +EXPLAIN SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (OR (< (TOK_TABLE_OR_COL key) 10) (> (TOK_TABLE_OR_COL key) 480))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: ((key < 10) or (key > 480)) + type: boolean + Filter Operator + predicate: + expr: ((key < 10) or (key > 480)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_23-01-45_144_1912442074119375716/-mr-10000 +POSTHOOK: query: SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_23-01-45_144_1912442074119375716/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: -- columns are not covered by indexes +DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: -- columns are not covered by indexes +DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE INDEX src_val_index ON TABLE src(value) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src_val_index ON TABLE src(value) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: ALTER INDEX src_val_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_val_index__ +POSTHOOK: query: ALTER INDEX src_val_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_val_index__ +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__src_src_val_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_val_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_val_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__src_src_val_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_val_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_val_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_23-02-06_016_8559589878310100219/-mr-10000 +POSTHOOK: query: SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_23-02-06_016_8559589878310100219/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__src_src_val_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_val_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_val_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: DROP INDEX src_val_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_val_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__src_src_val_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_val_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_val_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/index_opt_where.q.out ql/src/test/results/clientpositive/index_opt_where.q.out new file mode 100644 index 0000000..e7f2a8e --- /dev/null +++ ql/src/test/results/clientpositive/index_opt_where.q.out @@ -0,0 +1,187 @@ +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing +SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/rmelick/hive_2011-03-31_20-46-05_776_3701467786693766961/-mr-10000 +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing +SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/rmelick/hive_2011-03-31_20-46-05_776_3701467786693766961/-mr-10000 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE DIRECTORY "/tmp/index_where" SELECT `_bucketname` , `_offsets` FROM default__src_src_index__ WHERE key > 80 AND key < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Output: /tmp/index_where +POSTHOOK: query: INSERT OVERWRITE DIRECTORY "/tmp/index_where" SELECT `_bucketname` , `_offsets` FROM default__src_src_index__ WHERE key > 80 AND key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Output: /tmp/index_where +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (AND (> (TOK_TABLE_OR_COL key) 80) (< (TOK_TABLE_OR_COL key) 100))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/rmelick/hive_2011-03-31_20-46-25_021_8458385140495465322/-mr-10000 +POSTHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/rmelick/hive_2011-03-31_20-46-25_021_8458385140495465322/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- test automatic usage of index in query +SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/rmelick/hive_2011-03-31_20-46-28_200_3904715072658165263/-mr-10000 +POSTHOOK: query: -- test automatic usage of index in query +SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/rmelick/hive_2011-03-31_20-46-28_200_3904715072658165263/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/index_opt_where_partitioned.q.out ql/src/test/results/clientpositive/index_opt_where_partitioned.q.out new file mode 100644 index 0000000..c7fa5ca --- /dev/null +++ ql/src/test/results/clientpositive/index_opt_where_partitioned.q.out @@ -0,0 +1,185 @@ +PREHOOK: query: -- test automatic use of index on table with partitions +CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: -- test automatic use of index on table with partitions +CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_part_index ON srcpart REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=11 +PREHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=12 +PREHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=11 +PREHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: query: ALTER INDEX src_part_index ON srcpart REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: EXPLAIN SELECT key, value FROM srcpart WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, value FROM srcpart WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 , consists of Stage-3, Stage-6 + Stage-3 + Stage-7 depends on stages: Stage-3, Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + default__srcpart_src_part_index__ + TableScan + alias: default__srcpart_src_part_index__ + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-2 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_00-34-18_135_8084893435580971559/-ext-10000 + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_00-34-17_012_8101433970073370450/-mr-10002 + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_00-34-18_135_8084893435580971559/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, value FROM srcpart WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=11 +PREHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=12 +PREHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=11 +PREHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_00-34-18_783_821789030824931676/-mr-10000 +POSTHOOK: query: SELECT key, value FROM srcpart WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_00-34-18_783_821789030824931676/-mr-10000 +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +86 val_86 +86 val_86 +86 val_86 +86 val_86 +PREHOOK: query: DROP INDEX src_part_index ON srcpart +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_part_index ON srcpart +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/index_opt_where_simple.q.out ql/src/test/results/clientpositive/index_opt_where_simple.q.out new file mode 100644 index 0000000..1cad36a --- /dev/null +++ ql/src/test/results/clientpositive/index_opt_where_simple.q.out @@ -0,0 +1,376 @@ +PREHOOK: query: -- test automatic use of index and different file formats +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: -- test automatic use of index and different file formats +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE DIRECTORY "/tmp/index_result_where1" SELECT `_bucketname` , `_offsets` FROM default__src_src_index__ WHERE key=86 +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Output: /tmp/index_result_where1 +POSTHOOK: query: INSERT OVERWRITE DIRECTORY "/tmp/index_result_where1" SELECT `_bucketname` , `_offsets` FROM default__src_src_index__ WHERE key=86 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Output: /tmp/index_result_where1 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_00-29-09_733_8163854871744736291/-mr-10000 +POSTHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_00-29-09_733_8163854871744736291/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +86 val_86 +PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 , consists of Stage-3, Stage-6 + Stage-3 + Stage-7 depends on stages: Stage-3, Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + default__src_src_index__ + TableScan + alias: default__src_src_index__ + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-2 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_00-29-16_206_3979359743078885591/-ext-10000 + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_00-29-15_992_6986222140422394063/-mr-10002 + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_00-29-16_206_3979359743078885591/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_00-29-16_357_1908730236010739476/-mr-10000 +POSTHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_00-29-16_357_1908730236010739476/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +86 val_86 +PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 , consists of Stage-3, Stage-6 + Stage-3 + Stage-7 depends on stages: Stage-3, Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + default__src_src_index__ + TableScan + alias: default__src_src_index__ + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-2 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_00-29-30_208_6039990978256232577/-ext-10000 + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_00-29-30_053_4054856220568922842/-mr-10002 + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/rmelick/hive/build/ql/scratchdir/hive_2011-04-15_00-29-30_208_6039990978256232577/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_00-29-31_079_3792436457335490072/-mr-10000 +POSTHOOK: query: SELECT key, value FROM src WHERE key=86 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/D-/D-v4r3JYHU4wWmtdk5+mbU+++TI/-Tmp-/rmelick/hive_2011-04-15_00-29-31_079_3792436457335490072/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +86 val_86 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]