Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1172965) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -376,6 +376,7 @@ HIVEOPTPPD("hive.optimize.ppd", true), // predicate pushdown HIVEPPDRECOGNIZETRANSITIVITY("hive.ppd.recognizetransivity", true), // predicate pushdown HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true), + HIVEMETADATAONLYQUERIES("hive.optimize.metadataonly", true), // push predicates down to storage handlers HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true), HIVEOPTGROUPBY("hive.optimize.groupby", true), // optimize group by Index: serde/src/java/org/apache/hadoop/hive/serde2/NullStructSerDe.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/NullStructSerDe.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/NullStructSerDe.java (revision 0) @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; + +/** + * Placeholder SerDe for cases where neither serialization nor deserialization is needed + * + */ +public class NullStructSerDe implements SerDe { + + class NullStructField implements StructField { + @Override + public String getFieldName() { + return null; + } + + @Override + public ObjectInspector getFieldObjectInspector() { + return null; + } + + @Override + public String getFieldComment() { + return ""; + } + } + + @Override + public Object deserialize(Writable blob) throws SerDeException { + return null; + } + + @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return new StructObjectInspector() { + public String getTypeName() { + return "null"; + } + public Category getCategory() { + return Category.PRIMITIVE; + } + @Override + public StructField getStructFieldRef(String fieldName) { + return null; + } + @Override + public List getAllStructFieldRefs() { + return new ArrayList(); + } + @Override + public Object getStructFieldData(Object data, StructField fieldRef) { + return null; + } + @Override + public List getStructFieldsDataAsList(Object data) { + return new ArrayList(); + } + }; + } + + @Override + public SerDeStats getSerDeStats() { + return null; + } + + @Override + public void initialize(Configuration conf, Properties tbl) throws SerDeException { + } + + @Override + public Class getSerializedClass() { + return NullWritable.class; + } + + @Override + public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { + return NullWritable.get(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (working copy) @@ -166,6 +166,15 @@ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, Map prunedPartitionsMap) throws HiveException { + if(tab == null) { + // this is the case for select from dual + // should we cache the returned value? probably not, it's cheap to compute and returned + // object identity between calls is rather not required + LinkedHashSet true_parts = new LinkedHashSet(); + LinkedHashSet unkn_parts = new LinkedHashSet(); + LinkedHashSet denied_parts = new LinkedHashSet(); + return new PrunedPartitionList(true_parts, unkn_parts, denied_parts); + } LOG.trace("Started pruning partiton"); LOG.trace("dbname = " + tab.getDbName()); LOG.trace("tabname = " + tab.getTableName()); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy) @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.optimizer.index.RewriteGBUsingIndex; import org.apache.hadoop.hive.ql.optimizer.lineage.Generator; +import org.apache.hadoop.hive.ql.optimizer.moq.MetadataOnlyOptimizer; import org.apache.hadoop.hive.ql.optimizer.pcr.PartitionConditionRemover; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcessor; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/moq/MetadataOnlyOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/moq/MetadataOnlyOptimizer.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/moq/MetadataOnlyOptimizer.java (revision 0) @@ -0,0 +1,145 @@ +package org.apache.hadoop.hive.ql.optimizer.moq; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.Transform; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +/** + * + * MetadataOnlyOptimizer determines to which TableScanOperators "metadata only" optimization + * can be applied. Such operator must use only partition columns (it is easy to check, because + * we are after column pruning and all places where the data from the operator is used must + * go through GroupByOperator distinct or distinct-like aggregations. Aggregation is distinct-like + * if adding distinct wouldn't change the result, for example min, max. + * + * We cannot apply the optimization without group by, because the results depend on the + * numbers of rows in partitions, for example count(hr) will count all rows in matching + * partitions. + * + */ +public class MetadataOnlyOptimizer implements Transform { + private static final Log LOG = LogFactory.getLog(MetadataOnlyOptimizer.class + .getName()); + + static private class WalkerCtx implements NodeProcessorCtx { + /* operators for which there is chance the optimization can be applied*/ + private final HashSet possible = new HashSet(); + /* operators which we consider not applicable for the optimization*/ + private final HashSet banned = new HashSet(); + + /** + * Sets operator as one for which there is a chance to apply optimization + * @param op the operator + */ + void setMayBeMetadataOnly(TableScanOperator op) { + if(!banned.contains(op)) { + possible.add(op); + } + } + /** + * Sets operator as one for which we cannot apply the optimization + * @param op the operator + */ + void setIsNotMetadataOnly(TableScanOperator op) { + possible.remove(op); + banned.add(op); + } + + /** + * Returns HashSet of collected operators for which the optimization is applicable. + */ + HashSet getMetadataOnlyTableScans() { + return possible; + } + + } + + static private class TableScanProcessor implements NodeProcessor { + public TableScanProcessor() { + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + TableScanOperator node = (TableScanOperator) nd; + WalkerCtx walkerCtx = (WalkerCtx) procCtx; + if(node.getNeededColumnIDs().size() == 0) { + // getNeededColumnIDs returns only data columns + walkerCtx.setMayBeMetadataOnly(node); + } + return nd; + } + } + static private class FileSinkProcessor implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + boolean metGby = false; + WalkerCtx walkerCtx = (WalkerCtx) procCtx; + TableScanOperator tso = (TableScanOperator) stack.get(0); + assert(tso != null); + for(Node op: stack) { + if(op instanceof GroupByOperator) { + metGby = true; + GroupByOperator gby = (GroupByOperator) op; + if(!gby.isDistinctLike()) { + // GroupBy not distinct like, disabling + walkerCtx.setIsNotMetadataOnly(tso); + } + } + } + if(!metGby) { + // no aggregation, disabling + walkerCtx.setIsNotMetadataOnly(tso); + } + + return nd; + } + } + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + LOG.info("Looking for table scans where optimization is applicable"); + // create a the context for walking operators + WalkerCtx walkerCtx = new WalkerCtx(); + + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", "TS%"), new TableScanProcessor()); + opRules.put(new RuleRegExp("R2", "FS%"), new FileSinkProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, walkerCtx); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + LOG.info(String.format("Found %d metadata only table scans", walkerCtx.getMetadataOnlyTableScans().size())); + pctx.setMetadataOnlyTableScans(walkerCtx.getMetadataOnlyTableScans()); + + return pctx; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; @@ -70,6 +71,7 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext; +import org.apache.hadoop.hive.serde2.NullStructSerDe; /** * General utility common functions for the Processor to convert operator into @@ -582,9 +584,16 @@ // The table does not have any partitions if (aliasPartnDesc == null) { + if(parseCtx.getTopToTable().get(topOp) == null) { + // Table can be null when it is select from dual, TableDesc should + // be provided in such case + TableScanOperator tso = (TableScanOperator) topOp; + assert tso.getTableDesc() != null; + aliasPartnDesc = new PartitionDesc(tso.getTableDesc(), null); + } else { aliasPartnDesc = new PartitionDesc(Utilities.getTableDesc(parseCtx .getTopToTable().get(topOp)), null); - + } } plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc); @@ -616,11 +625,19 @@ boolean isFirstPart = true; boolean emptyInput = true; boolean singlePartition = (parts.size() == 1); + + boolean metadataOnly = false; + if(parseCtx.isMetadataOnlyTableScan((TableScanOperator)topOp)) { + metadataOnly = true; + } + for (Partition part : parts) { - if (part.getTable().isPartitioned()) { - inputs.add(new ReadEntity(part)); - } else { - inputs.add(new ReadEntity(part.getTable())); + if (!metadataOnly) { + if (part.getTable().isPartitioned()) { + inputs.add(new ReadEntity(part)); + } else { + inputs.add(new ReadEntity(part.getTable())); + } } // Later the properties have to come from the partition as opposed @@ -628,7 +645,17 @@ Path[] paths = null; sampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(topOp); - if (sampleDescr != null) { + if (metadataOnly) { + Path[] allPaths = part.getPath(); + if (allPaths.length > 0) { + String uriPath = "/fake-path" + allPaths[0].toUri().getPath(); + Path fakePath = new Path("file", null, uriPath); + paths = new Path[] { fakePath }; + } else { + paths = new Path[0]; + } + } + else if (sampleDescr != null) { paths = SamplePruner.prune(part, sampleDescr); parseCtx.getGlobalLimitCtx().disableOpt(); } else { @@ -701,12 +728,19 @@ LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } + if(metadataOnly) { + PartitionDesc desc = partDesc.get(partDesc.size() - 1); + desc.setInputFileFormatClass(OneNullRowInputFormat.class); + desc.setDeserializerClass(NullStructSerDe.class); + desc.setSerdeClassName(NullStructSerDe.class.getName()); + } } } if (emptyInput) { parseCtx.getGlobalLimitCtx().disableOpt(); } + Iterator iterPath = partDir.iterator(); Iterator iterPartnDesc = partDesc.iterator(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java (working copy) @@ -137,6 +137,10 @@ // Table scan operator. TableScanOperator top = (TableScanOperator)nd; org.apache.hadoop.hive.ql.metadata.Table t = pctx.getTopToTable().get(top); + if(t == null) { + // table can be null when it is select from dual + return null; + } Table tab = t.getTTable(); // Generate the mappings Index: ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (working copy) @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer; import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; @@ -1047,6 +1048,24 @@ } /** + * Checks if this grouping is like distinct, which means that all non-distinct grouping + * columns behave like they were distinct - for example min and max operators. + */ + public boolean isDistinctLike() { + ArrayList aggregators = conf.getAggregators(); + for(AggregationDesc ad: aggregators){ + if(!ad.getDistinct()) { + GenericUDAFEvaluator udafEval = ad.getGenericUDAFEvaluator(); + UDFType annot = udafEval.getClass().getAnnotation(UDFType.class); + if(annot == null || !annot.distinctLike()) { + return false; + } + } + } + return true; + } + + /** * @return the name of the operator */ @Override Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredMetadataWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredMetadataWork.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredMetadataWork.java (revision 0) @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +import java.util.List; + +public class MapredMetadataWork implements Serializable { + private static final long serialVersionUID = 7588758198768362051L; + private List partitionsNames; + + public MapredMetadataWork() { + } + + public MapredMetadataWork(MapredMetadataWork clone) { + partitionsNames = clone.partitionsNames; + } + + public MapredMetadataWork(List partitionsNames) { + this.partitionsNames = partitionsNames; + } + + public List getPartitionsNames() { + return partitionsNames; + } + + public void setPartitionsNames(List partitionsNames) { + this.partitionsNames = partitionsNames; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (working copy) @@ -75,6 +75,8 @@ private boolean hadoopSupportsSplittable; private MapredLocalWork mapLocalWork; + private MapredMetadataWork mapredMetadataWork; + private String inputformat; private String indexIntermediateFile; private boolean gatheringStats; @@ -99,6 +101,7 @@ final TableDesc keyDesc, List tagToValueDesc, final Operator reducer, final Integer numReduceTasks, final MapredLocalWork mapLocalWork, + final MapredMetadataWork mapredMetadataWork, final boolean hadoopSupportsSplittable) { this.command = command; this.pathToAliases = pathToAliases; @@ -109,6 +112,7 @@ this.reducer = reducer; this.numReduceTasks = numReduceTasks; this.mapLocalWork = mapLocalWork; + this.mapredMetadataWork = mapredMetadataWork; aliasToPartnInfo = new LinkedHashMap(); this.hadoopSupportsSplittable = hadoopSupportsSplittable; maxSplitSize = null; @@ -187,6 +191,14 @@ this.mapLocalWork = mapLocalWork; } + public MapredMetadataWork getMapredMetadataWork() { + return mapredMetadataWork; + } + + public void setMapredMetadataWork(MapredMetadataWork mapredMetadataWork) { + this.mapredMetadataWork = mapredMetadataWork; + } + public TableDesc getKeyDesc() { return keyDesc; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (working copy) @@ -84,7 +84,7 @@ new LinkedHashMap(), new LinkedHashMap>(), new TableDesc(), new ArrayList(), null, Integer.valueOf(1), - null, Hive.get().getConf().getBoolVar( + null, null, Hive.get().getConf().getBoolVar( HiveConf.ConfVars.HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE)); } catch (HiveException ex) { throw new RuntimeException(ex); Index: ql/src/java/org/apache/hadoop/hive/ql/io/OneNullRowInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/OneNullRowInputFormat.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/OneNullRowInputFormat.java (revision 0) @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobConfigurable; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; + +/** + * OneNullRowInputFormat outputs one null row. Used in implementation of + * metadata only queries. + * + */ +public class OneNullRowInputFormat implements + InputFormat, JobConfigurable { + private static final Log LOG = LogFactory.getLog(OneNullRowInputFormat.class + .getName()); + MapredWork mrwork = null; + List partitions; + long len; + + static public class DummyInputSplit implements InputSplit { + public DummyInputSplit() { + } + + @Override + public long getLength() throws IOException { + return 1; + } + + @Override + public String[] getLocations() throws IOException { + return new String[0]; + } + + @Override + public void readFields(DataInput arg0) throws IOException { + } + + @Override + public void write(DataOutput arg0) throws IOException { + } + + } + + static public class OneNullRowRecordReader implements RecordReader { + private boolean processed = false; + public OneNullRowRecordReader() { + } + @Override + public void close() throws IOException { + } + + @Override + public NullWritable createKey() { + return NullWritable.get(); + } + + @Override + public NullWritable createValue() { + return NullWritable.get(); + } + + @Override + public long getPos() throws IOException { + return (processed ? 1 : 0); + } + + @Override + public float getProgress() throws IOException { + return (float) (processed ? 1.0 : 0.0); + } + + @Override + public boolean next(NullWritable arg0, NullWritable arg1) throws IOException { + if(processed) { + return false; + } else { + processed = true; + return true; + } + } + + } + + @Override + public RecordReader getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2) + throws IOException { + return new OneNullRowRecordReader(); + } + + @Override + public InputSplit[] getSplits(JobConf arg0, int arg1) throws IOException { + InputSplit[] ret = new InputSplit[1]; + ret[0] = new DummyInputSplit(); + LOG.info("Calculating splits"); + return ret; + } + + @Override + public void configure(JobConf job) { + LOG.info("Using one null row input format"); + } + +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (working copy) @@ -80,6 +80,7 @@ TOK_DISTRIBUTEBY; TOK_SORTBY; TOK_UNION; +TOK_DUAL; TOK_JOIN; TOK_LEFTOUTERJOIN; TOK_RIGHTOUTERJOIN; @@ -1591,7 +1592,8 @@ joinSource @init { msgs.push("join source"); } @after { msgs.pop(); } - : fromSource ( joinToken^ fromSource (KW_ON! expression)? )* + : KW_DUAL -> TOK_DUAL + | fromSource ( joinToken^ fromSource (KW_ON! expression)? )* | uniqueJoinToken^ uniqueJoinSource (COMMA! uniqueJoinSource)+ ; @@ -2129,6 +2131,7 @@ KW_OUTER : 'OUTER'; KW_UNIQUEJOIN : 'UNIQUEJOIN'; KW_PRESERVE : 'PRESERVE'; +KW_DUAL: 'DUAL'; KW_JOIN : 'JOIN'; KW_INNER : 'INNER'; KW_LEFT : 'LEFT'; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (working copy) @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Set; @@ -42,6 +43,7 @@ private int numSelDi = 0; private HashMap aliasToTabs; private HashMap aliasToSubq; + private HashSet aliasDuals; private List aliases; private QBParseInfo qbp; private QBMetaData qbm; @@ -68,6 +70,7 @@ public QB(String outer_id, String alias, boolean isSubQ) { aliasToTabs = new HashMap(); aliasToSubq = new HashMap(); + aliasDuals = new HashSet(); aliases = new ArrayList(); if (alias != null) { alias = alias.toLowerCase(); @@ -99,7 +102,8 @@ public boolean exists(String alias) { alias = alias.toLowerCase(); - if (aliasToTabs.get(alias) != null || aliasToSubq.get(alias) != null) { + if (aliasToTabs.get(alias) != null || aliasToSubq.get(alias) != null + || aliasDuals.contains(alias)) { return true; } @@ -114,6 +118,10 @@ aliasToSubq.put(alias.toLowerCase(), qbexpr); } + public void setDualAlias(String alias) { + aliasDuals.add(alias); + } + public void addAlias(String alias) { if (!aliases.contains(alias.toLowerCase())) { aliases.add(alias.toLowerCase()); @@ -148,6 +156,10 @@ return aliasToTabs.keySet(); } + public Set getDualAliases() { + return aliasDuals; + } + public List getAliases() { return aliases; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -27,6 +27,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.TreeSet; import java.util.Map.Entry; @@ -72,7 +73,9 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; +import org.apache.hadoop.hive.ql.io.HiveNullValueSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -154,6 +157,7 @@ import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; +import org.apache.hadoop.hive.serde2.NullStructSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; @@ -175,6 +179,7 @@ private HashMap opToPartList; private HashMap> topOps; private HashMap> topSelOps; + private HashSet metadataOnlyTableScans; private LinkedHashMap, OpParseContext> opParseCtx; private List loadTableWork; private List loadFileWork; @@ -261,6 +266,7 @@ opToPartPruner = new HashMap(); opToPartList = new HashMap(); opToSamplePruner = new HashMap(); + metadataOnlyTableScans = new HashSet(); nameToSplitSample = new HashMap(); topOps = new HashMap>(); topSelOps = new HashMap>(); @@ -303,6 +309,7 @@ opToPartPruner = pctx.getOpToPartPruner(); opToPartList = pctx.getOpToPartList(); opToSamplePruner = pctx.getOpToSamplePruner(); + metadataOnlyTableScans = pctx.getMetadataOnlyTableScans(); topOps = pctx.getTopOps(); topSelOps = pctx.getTopSelOps(); opParseCtx = pctx.getOpParseCtx(); @@ -325,7 +332,7 @@ topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, - opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks); + opToSamplePruner, metadataOnlyTableScans, globalLimitCtx, nameToSplitSample, inputs, rootTasks); } @SuppressWarnings("nls") @@ -556,6 +563,20 @@ return alias; } + /** + * Processes DUAL clause (which is one-row zero-column virtual table) after FROM. + */ + private String processDual(QB qb, ASTNode tabref) throws SemanticException { + ASTNode dualTree = (ASTNode) (tabref.getChild(0)); + String alias = "dual"; + if(!qb.exists(alias)) { + qb.addAlias(alias); + qb.setDualAlias(alias); + qb.getParseInfo().setSrcForAlias(alias, dualTree); + } + return alias; + } + private String processSubQuery(QB qb, ASTNode subq) throws SemanticException { // This is a subquery and must have an alias @@ -754,6 +775,8 @@ ASTNode frm = (ASTNode) ast.getChild(0); if (frm.getToken().getType() == HiveParser.TOK_TABREF) { processTable(qb, frm); + } else if(frm.getToken().getType() == HiveParser.TOK_DUAL) { + processDual(qb, frm); } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) { processSubQuery(qb, frm); } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) { @@ -6366,6 +6389,59 @@ return output; } + + /** + * Generates/configures TableScanOperator for DUAL virtual table + */ + private Operator genDualPlan(String alias, QB qb) throws SemanticException { + + String alias_id = getAliasId(alias, qb); + RowResolver rwsch; + + // is it already present + Operator top = topOps.get(alias_id); + Operator dummySel = topSelOps.get(alias_id); + if (dummySel != null) { + top = dummySel; + } + + if (top == null) { + rwsch = new RowResolver(); + RowSchema rs = new RowSchema(new ArrayList()); + rwsch.setRowSchema(rs); + + // should we add virtual columns for DUAL? + List vcList = new ArrayList(); + + // Create the root of the operator tree + TableScanDesc tsDesc = new TableScanDesc(alias, vcList); + //setupStats(tsDesc, qb.getParseInfo(), tab, alias, rwsch); + + top = putOpInsertMap(OperatorFactory.get(tsDesc, + new RowSchema(rwsch.getColumnInfos())), rwsch); + + Properties props = new Properties(); + TableDesc tableDesc = new TableDesc(NullStructSerDe.class, + OneNullRowInputFormat.class, HiveNullValueSequenceFileOutputFormat.class, + props); + ((TableScanOperator)top).setTableDesc(tableDesc); + + // Add this to the list of top operators - we always start from a table + // scan + topOps.put(alias_id, top); + } else { + rwsch = opParseCtx.get(top).getRowResolver(); + top.setChildOperators(null); + } + Operator output = putOpInsertMap(top, rwsch); + + if (LOG.isDebugEnabled()) { + LOG.debug("Created Dual Plan for " + alias); + } + + return output; + } + private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, RowResolver rwsch) throws SemanticException { @@ -6456,6 +6532,11 @@ aliasToOpInfo.put(alias, op); } + for(String alias : qb.getDualAliases()) { + Operator op = genDualPlan(alias, qb); + aliasToOpInfo.put(alias, op); + } + // For all the source tables that have a lateral view, attach the // appropriate operators to the TS genLateralViewPlans(aliasToOpInfo, qb); @@ -7247,7 +7328,7 @@ opToPartList, topOps, topSelOps, opParseCtx, joinContext, topToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, - opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks); + opToSamplePruner, metadataOnlyTableScans, globalLimitCtx, nameToSplitSample, inputs, rootTasks); Optimizer optm = new Optimizer(); optm.setPctx(pCtx); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -61,6 +61,7 @@ private HashMap opToPartPruner; private HashMap opToPartList; private HashMap opToSamplePruner; + private HashSet metadataOnlyTableScans; private HashMap> topOps; private HashMap> topSelOps; private LinkedHashMap, OpParseContext> opParseCtx; @@ -155,6 +156,7 @@ Map> groupOpToInputTables, Map prunedPartitions, HashMap opToSamplePruner, + HashSet metadataOnlyTableScans, SemanticAnalyzer.GlobalLimitCtx globalLimitCtx, HashMap nameToSplitSample, HashSet semanticInputs, List> rootTasks) { @@ -179,6 +181,7 @@ this.groupOpToInputTables = groupOpToInputTables; this.prunedPartitions = prunedPartitions; this.opToSamplePruner = opToSamplePruner; + this.metadataOnlyTableScans = metadataOnlyTableScans; this.nameToSplitSample = nameToSplitSample; this.globalLimitCtx = globalLimitCtx; this.semanticInputs = semanticInputs; @@ -456,6 +459,17 @@ this.opToSamplePruner = opToSamplePruner; } + public void setMetadataOnlyTableScans(HashSet metadataOnlyTableScans) { + this.metadataOnlyTableScans = metadataOnlyTableScans; + } + + public HashSet getMetadataOnlyTableScans() { + return metadataOnlyTableScans; + } + + public boolean isMetadataOnlyTableScan(TableScanOperator tos) { + return metadataOnlyTableScans != null && metadataOnlyTableScans.contains(tos); + } /** * @return the groupOpToInputTables */ Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (working copy) @@ -33,4 +33,5 @@ public @interface UDFType { boolean deterministic() default true; boolean stateful() default false; + boolean distinctLike() default false; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java (working copy) @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -49,6 +50,7 @@ return new GenericUDAFMaxEvaluator(); } + @UDFType(distinctLike=true) public static class GenericUDAFMaxEvaluator extends GenericUDAFEvaluator { ObjectInspector inputOI; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java (revision 1172965) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java (working copy) @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -49,6 +50,7 @@ return new GenericUDAFMinEvaluator(); } + @UDFType(distinctLike=true) public static class GenericUDAFMinEvaluator extends GenericUDAFEvaluator { ObjectInspector inputOI;