diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 367f056..e98c24f 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -546,6 +546,9 @@ public class HiveConf extends Configuration { HIVEOUTERJOINSUPPORTSFILTERS("hive.outerjoin.supports.filters", true), + // 'minimal', 'more' (and 'all' later) + HIVEFETCHTASKCONVERSION("hive.fetch.task.conversion", "minimal"), + // Serde for FetchTask HIVEFETCHOUTPUTSERDE("hive.fetch.output.serde", "org.apache.hadoop.hive.serde2.DelimitedJSONSerDe"), diff --git conf/hive-default.xml.template conf/hive-default.xml.template index 150bbeb..b2b3342 100644 --- conf/hive-default.xml.template +++ conf/hive-default.xml.template @@ -1389,5 +1389,17 @@ + + hive.fetch.task.conversion + minimal + + Some select queries can be converted to single FETCH task minimizing latency. + Currently the query should be single sourced not having any subquery and should not have + any aggregations or distincts (which incurrs RS), lateral views and joins. + 1. minimal : SELECT STAR, FILTER on partition columns, LIMIT only + 2. more : SELECT, FILTER, LIMIT only (+TABLESAMPLE, virtual columns) + + + diff --git hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out index b35860f..e182e17 100644 --- hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out +++ hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out @@ -441,6 +441,17 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 + Processor Tree: + TableScan + alias: hbase_pushdown + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + ListSink PREHOOK: query: -- with a predicate which is not actually part of the filter, so diff --git hbase-handler/src/test/results/positive/hbase_pushdown.q.out hbase-handler/src/test/results/positive/hbase_pushdown.q.out index 45927dd..f55fa95 100644 --- hbase-handler/src/test/results/positive/hbase_pushdown.q.out +++ hbase-handler/src/test/results/positive/hbase_pushdown.q.out @@ -256,6 +256,17 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 + Processor Tree: + TableScan + alias: hbase_pushdown + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + ListSink PREHOOK: query: -- with a predicate which is not actually part of the filter, so diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 567aafa..c4421ba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -442,13 +442,6 @@ public class Driver implements CommandProcessor { sem.validate(); plan = new QueryPlan(command, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN)); - // initialize FetchTask right here - if (plan.getFetchTask() != null) { - plan.getFetchTask().initialize(conf, plan, null); - } - - // get the output schema - schema = getSchema(sem, conf); // test Only - serialize the query plan and deserialize it if ("true".equalsIgnoreCase(System.getProperty("test.serialize.qplan"))) { @@ -477,6 +470,9 @@ public class Driver implements CommandProcessor { plan.getFetchTask().initialize(conf, plan, null); } + // get the output schema + schema = getSchema(sem, conf); + //do the authorization check if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java index a633314..c66cf0c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java @@ -68,7 +68,10 @@ public class ExecMapperContext { ioCxt = IOContext.get(); } - + public void clear() { + IOContext.clear(); + ioCxt = null; + } /** * For CompbineFileInputFormat, the mapper's input file will be changed on the @@ -174,5 +177,4 @@ public class ExecMapperContext { public void setIoCxt(IOContext ioCxt) { this.ioCxt = ioCxt; } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 4af7a13..e7d4286 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -35,7 +35,12 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader; +import org.apache.hadoop.hive.ql.io.HiveInputFormat; +import org.apache.hadoop.hive.ql.io.HiveRecordReader; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -46,6 +51,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; @@ -66,40 +72,82 @@ public class FetchOperator implements Serializable { private boolean isNativeTable; private FetchWork work; + private Operator operator; // operator tree for processing row further (option) private int splitNum; private PartitionDesc currPart; private TableDesc currTbl; private boolean tblDataDone; + private boolean hasVC; + private boolean isPartitioned; + private StructObjectInspector vcsOI; + private List vcCols; + private ExecMapperContext context; + private transient RecordReader currRecReader; - private transient InputSplit[] inputSplits; + private transient FetchInputFormatSplit[] inputSplits; private transient InputFormat inputFormat; private transient JobConf job; private transient WritableComparable key; private transient Writable value; + private transient Writable[] vcValues; private transient Deserializer serde; private transient Iterator iterPath; private transient Iterator iterPartDesc; private transient Path currPath; private transient StructObjectInspector rowObjectInspector; - private transient Object[] rowWithPart; + private transient Object[] row; + public FetchOperator() { } public FetchOperator(FetchWork work, JobConf job) { + this.job = job; this.work = work; - initialize(job); + initialize(); } - public void initialize(JobConf job) { + public FetchOperator(FetchWork work, JobConf job, Operator operator, + List vcCols) { this.job = job; + this.work = work; + this.operator = operator; + this.vcCols = vcCols; + initialize(); + } + + private void initialize() { + if (hasVC = vcCols != null && !vcCols.isEmpty()) { + List names = new ArrayList(vcCols.size()); + List inspectors = new ArrayList(vcCols.size()); + for (VirtualColumn vc : vcCols) { + inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + vc.getTypeInfo().getPrimitiveCategory())); + names.add(vc.getName()); + } + vcsOI = ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors); + vcValues = new Writable[vcCols.size()]; + } + isPartitioned = work.isPartitioned(); tblDataDone = false; - rowWithPart = new Object[2]; - if (work.getTblDir() != null) { + if (hasVC && isPartitioned) { + row = new Object[3]; + } else if (hasVC || isPartitioned) { + row = new Object[2]; + } else { + row = new Object[1]; + } + if (work.getTblDesc() != null) { isNativeTable = !work.getTblDesc().isNonNative(); } else { isNativeTable = true; } + if (hasVC || work.getSplitSample() != null) { + context = new ExecMapperContext(); + if (operator != null) { + operator.setExecContext(context); + } + } } public FetchWork getWork() { @@ -151,6 +199,7 @@ public class FetchOperator implements Serializable { */ private static Map> inputFormats = new HashMap>(); + @SuppressWarnings("unchecked") static InputFormat getInputFormatFromCache(Class inputFormatClass, Configuration conf) throws IOException { if (!inputFormats.containsKey(inputFormatClass)) { @@ -166,17 +215,35 @@ public class FetchOperator implements Serializable { return inputFormats.get(inputFormatClass); } - private void setPrtnDesc(TableDesc table, Map partSpec) throws Exception { + private StructObjectInspector setPrtnDesc(TableDesc table) throws Exception { + Deserializer serde = table.getDeserializerClass().newInstance(); + serde.initialize(job, table.getProperties()); + return rowObjectInspector = createRowInspector(serde); + } + + private StructObjectInspector setPrtnDesc(TableDesc table, + Map partSpec) throws Exception { + Deserializer serde = table.getDeserializerClass().newInstance(); + serde.initialize(job, table.getProperties()); String pcols = table.getProperties().getProperty( org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS); String[] partKeys = pcols.trim().split("/"); if (partSpec != null) { - rowWithPart[1] = createPartValue(partKeys, partSpec); + row[1] = createPartValue(partKeys, partSpec); } - rowObjectInspector = createRowInspector(partKeys); + return rowObjectInspector = createRowInspector(serde, partKeys); } - private StructObjectInspector createRowInspector(String[] partKeys) throws SerDeException { + private StructObjectInspector createRowInspector(Deserializer serde) throws SerDeException { + StructObjectInspector inspector = (StructObjectInspector) serde.getObjectInspector(); + + return hasVC ? ObjectInspectorFactory.repackUnionStructObjectInspector( + (UnionStructObjectInspector) rowObjectInspector, Arrays.asList(inspector, vcsOI)) + : inspector; + } + + private StructObjectInspector createRowInspector(Deserializer serde, String[] partKeys) + throws SerDeException { List partNames = new ArrayList(); List partObjectInspectors = new ArrayList(); for (String key : partKeys) { @@ -187,8 +254,15 @@ public class FetchOperator implements Serializable { .getStandardStructObjectInspector(partNames, partObjectInspectors); StructObjectInspector inspector = (StructObjectInspector) serde.getObjectInspector(); - return ObjectInspectorFactory.getUnionStructObjectInspector( - Arrays.asList(inspector, partObjectInspector)); + // TableScanOperator in FetchTask (source) is initialized by first output OI, which is + // acquired from FetchOperator#getOutputObjectInspector. But this can be changed at runtime + // for partitioned table and the changed outputOI is returned with row for FetchTask. + // But for the operator tree, OIs cannot be forwarded with row. + // So just repacks inside of rowObjectInspector with changed OIs. + return ObjectInspectorFactory.repackUnionStructObjectInspector( + (UnionStructObjectInspector) rowObjectInspector, + hasVC ? Arrays.asList(inspector, partObjectInspector, vcsOI) : + Arrays.asList(inspector, partObjectInspector)); } private List createPartValue(String[] partKeys, Map partSpec) { @@ -202,7 +276,7 @@ public class FetchOperator implements Serializable { private void getNextPath() throws Exception { // first time if (iterPath == null) { - if (work.getTblDir() != null) { + if (work.isNotPartitioned()) { if (!tblDataDone) { currPath = work.getTblDirPath(); currTbl = work.getTblDesc(); @@ -280,9 +354,19 @@ public class FetchOperator implements Serializable { tmp = new PartitionDesc(currTbl, null); } - inputFormat = getInputFormatFromCache(tmp.getInputFileFormatClass(), job); + Class formatter = tmp.getInputFileFormatClass(); + inputFormat = getInputFormatFromCache(formatter, job); Utilities.copyTableJobPropertiesToConf(tmp.getTableDesc(), job); - inputSplits = inputFormat.getSplits(job, 1); + InputSplit[] splits = inputFormat.getSplits(job, 1); + FetchInputFormatSplit[] inputSplits = new FetchInputFormatSplit[splits.length]; + for (int i = 0; i < splits.length; i++) { + inputSplits[i] = new FetchInputFormatSplit(splits[i], formatter.getName()); + } + if (work.getSplitSample() != null) { + inputSplits = splitSampling(work.getSplitSample(), inputSplits); + } + this.inputSplits = inputSplits; + splitNum = 0; serde = tmp.getDeserializerClass().newInstance(); serde.initialize(job, tmp.getProperties()); @@ -307,12 +391,74 @@ public class FetchOperator implements Serializable { return getRecordReader(); } - currRecReader = inputFormat.getRecordReader(inputSplits[splitNum++], job, Reporter.NULL); + final FetchInputFormatSplit target = inputSplits[splitNum]; + + @SuppressWarnings("unchecked") + final RecordReader reader = + inputFormat.getRecordReader(target.getInputSplit(), job, Reporter.NULL); + if (hasVC || work.getSplitSample() != null) { + currRecReader = new HiveRecordReader(reader, job) { + @Override + public boolean doNext(WritableComparable key, Writable value) throws IOException { + // if current pos is larger than shrinkedLength which is calculated for + // each split by table sampling, stop fetching any more (early exit) + if (target.shrinkedLength > 0 && + context.getIoCxt().getCurrentBlockStart() > target.shrinkedLength) { + return false; + } + return super.doNext(key, value); + } + }; + ((HiveContextAwareRecordReader)currRecReader). + initIOContext(target, job, inputFormat.getClass(), reader); + } else { + currRecReader = reader; + } + splitNum++; key = currRecReader.createKey(); value = currRecReader.createValue(); return currRecReader; } + private FetchInputFormatSplit[] splitSampling(SplitSample splitSample, + FetchInputFormatSplit[] splits) { + long totalSize = 0; + for (FetchInputFormatSplit split: splits) { + totalSize += split.getLength(); + } + List result = new ArrayList(); + long targetSize = (long) (totalSize * splitSample.getPercent() / 100D); + int startIndex = splitSample.getSeedNum() % splits.length; + long size = 0; + for (int i = 0; i < splits.length; i++) { + FetchInputFormatSplit split = splits[(startIndex + i) % splits.length]; + result.add(split); + long splitgLength = split.getLength(); + if (size + splitgLength >= targetSize) { + if (size + splitgLength > targetSize) { + split.shrinkedLength = targetSize - size; + } + break; + } + size += splitgLength; + } + return result.toArray(new FetchInputFormatSplit[result.size()]); + } + + /** + * Get the next row and push down it to operator tree. + * Currently only used by FetchTask. + **/ + public boolean pushRow() throws IOException, HiveException { + InspectableObject row = getNextRow(); + if (row != null) { + operator.process(row.o, 0); + } + return row != null; + } + + private transient final InspectableObject inspectable = new InspectableObject(); + /** * Get the next row. The fetch context is modified appropriately. * @@ -320,6 +466,9 @@ public class FetchOperator implements Serializable { public InspectableObject getNextRow() throws IOException { try { while (true) { + if (context != null) { + context.resetRow(); + } if (currRecReader == null) { currRecReader = getRecordReader(); if (currRecReader == null) { @@ -329,13 +478,27 @@ public class FetchOperator implements Serializable { boolean ret = currRecReader.next(key, value); if (ret) { - if (this.currPart == null) { - Object obj = serde.deserialize(value); - return new InspectableObject(obj, serde.getObjectInspector()); - } else { - rowWithPart[0] = serde.deserialize(value); - return new InspectableObject(rowWithPart, rowObjectInspector); + if (operator != null && context != null && context.inputFileChanged()) { + // The child operators cleanup if input file has changed + try { + operator.cleanUpInputFileChanged(); + } catch (HiveException e) { + throw new IOException(e); + } + } + if (hasVC) { + vcValues = MapOperator.populateVirtualColumnValues(context, vcCols, vcValues, serde); + row[isPartitioned ? 2 : 1] = vcValues; } + row[0] = serde.deserialize(value); + if (hasVC || isPartitioned) { + inspectable.o = row; + inspectable.oi = rowObjectInspector; + return inspectable; + } + inspectable.o = row[0]; + inspectable.oi = serde.getObjectInspector(); + return inspectable; } else { currRecReader.close(); currRecReader = null; @@ -356,6 +519,14 @@ public class FetchOperator implements Serializable { currRecReader.close(); currRecReader = null; } + if (operator != null) { + operator.close(false); + operator = null; + } + if (context != null) { + context.clear(); + context = null; + } this.currPath = null; this.iterPath = null; this.iterPartDesc = null; @@ -373,7 +544,7 @@ public class FetchOperator implements Serializable { this.iterPath = iterPath; this.iterPartDesc = iterPartDesc; if (iterPartDesc == null) { - if (work.getTblDir() != null) { + if (work.isNotPartitioned()) { this.currTbl = work.getTblDesc(); } else { // hack, get the first. @@ -388,31 +559,20 @@ public class FetchOperator implements Serializable { */ public ObjectInspector getOutputObjectInspector() throws HiveException { try { - if (work.getTblDir() != null) { - TableDesc tbl = work.getTblDesc(); - Deserializer serde = tbl.getDeserializerClass().newInstance(); - serde.initialize(job, tbl.getProperties()); - return serde.getObjectInspector(); + if (work.isNotPartitioned()) { + return setPrtnDesc(work.getTblDesc()); } - TableDesc tbl; - Map partSpec; List listParts = work.getPartDesc(); if (listParts == null || listParts.isEmpty()) { - tbl = work.getTblDesc(); - partSpec = null; - } else { - currPart = listParts.get(0); - tbl = currPart.getTableDesc(); - partSpec = currPart.getPartSpec(); + return setPrtnDesc(work.getTblDesc(), null); } - serde = tbl.getDeserializerClass().newInstance(); - serde.initialize(job, tbl.getProperties()); - setPrtnDesc(tbl, partSpec); - currPart = null; - return rowObjectInspector; + PartitionDesc currPart = listParts.get(0); + return setPrtnDesc(currPart.getTableDesc(), currPart.getPartSpec()); } catch (Exception e) { throw new HiveException("Failed with exception " + e.getMessage() + org.apache.hadoop.util.StringUtils.stringifyException(e)); + } finally { + currPart = null; } } @@ -440,4 +600,20 @@ public class FetchOperator implements Serializable { } return results.toArray(new FileStatus[results.size()]); } + + // for split sampling. shrinkedLength is checked against IOContext.getCurrentBlockStart, + // which is from RecordReader.getPos(). So some inputformats which does not support getPos() + // like HiveHBaseTableInputFormat cannot be used with this (todo) + private static class FetchInputFormatSplit extends HiveInputFormat.HiveInputSplit { + + // shrinked size for this split. counter part of this in normal mode is + // InputSplitShim.shrinkedLength. + // what's different is that this is evaluated by unit of row using RecordReader.getPos() + // and that is evaluated by unit of split using InputSplt.getLength(). + private long shrinkedLength = -1; + + public FetchInputFormatSplit(InputSplit split, String name) { + super(split, name); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java index f8373a3..c76c37d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java @@ -21,27 +21,24 @@ package org.apache.hadoop.hive.ql.exec; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; -import java.util.Properties; +import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; -import org.apache.hadoop.io.Text; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; /** @@ -51,9 +48,10 @@ public class FetchTask extends Task implements Serializable { private static final long serialVersionUID = 1L; private int maxRows = 100; - private FetchOperator ftOp; - private SerDe mSerde; + private FetchOperator fetch; + private ListSinkOperator sink; private int totalRows; + private static transient final Log LOG = LogFactory.getLog(FetchTask.class); public FetchTask() { @@ -63,28 +61,22 @@ public class FetchTask extends Task implements Serializable { @Override public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) { super.initialize(conf, queryPlan, ctx); + work.initializeForFetch(); try { // Create a file system handle JobConf job = new JobConf(conf, ExecDriver.class); - String serdeName = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEFETCHOUTPUTSERDE); - Class serdeClass = Class.forName(serdeName, true, - JavaUtils.getClassLoader()).asSubclass(SerDe.class); - // cast only needed for Hadoop 0.17 compatibility - mSerde = (SerDe) ReflectionUtils.newInstance(serdeClass, null); - - Properties serdeProp = new Properties(); - - // this is the default serialization format - if (mSerde instanceof DelimitedJSONSerDe) { - serdeProp.put(Constants.SERIALIZATION_FORMAT, "" + Utilities.tabCode); - serdeProp.put(Constants.SERIALIZATION_NULL_FORMAT, work.getSerializationNullFormat()); + Operator source = work.getSource(); + if (source instanceof TableScanOperator) { + TableScanOperator ts = (TableScanOperator) source; + HiveInputFormat.pushFilters(job, ts); + ColumnProjectionUtils.appendReadColumnIDs(job, ts.getNeededColumnIDs()); } + sink = work.getSink(); + fetch = new FetchOperator(work, job, source, getVirtualColumns(source)); + source.initialize(conf, new ObjectInspector[]{fetch.getOutputObjectInspector()}); - mSerde.initialize(job, serdeProp); - - ftOp = new FetchOperator(work, job); } catch (Exception e) { // Bail out ungracefully - we should never hit // this here - but would have hit it in SemanticAnalyzer @@ -93,6 +85,13 @@ public class FetchTask extends Task implements Serializable { } } + private List getVirtualColumns(Operator ts) { + if (ts instanceof TableScanOperator && ts.getConf() != null) { + return ((TableScanOperator)ts).getConf().getVirtualCols(); + } + return null; + } + @Override public int execute(DriverContext driverContext) { assert false; @@ -122,48 +121,26 @@ public class FetchTask extends Task implements Serializable { @Override public boolean fetch(ArrayList res) throws IOException, CommandNeedRetryException { + sink.reset(res); try { - int numRows = 0; - int rowsRet = maxRows; - - if (work.getLeastNumRows() > 0) { - if (totalRows == work.getLeastNumRows()) { - return false; - } - for (int i = 0; i < work.getLeastNumRows(); i++) { - InspectableObject io = ftOp.getNextRow(); - if (io == null) { - throw new CommandNeedRetryException(); - } - res.add(((Text) mSerde.serialize(io.o, io.oi)).toString()); - numRows++; - } - totalRows = work.getLeastNumRows(); - return true; - } - - if ((work.getLimit() >= 0) && ((work.getLimit() - totalRows) < rowsRet)) { - rowsRet = work.getLimit() - totalRows; + int rowsRet = work.getLeastNumRows(); + if (rowsRet <= 0) { + rowsRet = work.getLimit() >= 0 ? Math.min(work.getLimit() - totalRows, maxRows) : maxRows; } if (rowsRet <= 0) { - ftOp.clearFetchContext(); + fetch.clearFetchContext(); return false; } - - while (numRows < rowsRet) { - InspectableObject io = ftOp.getNextRow(); - if (io == null) { - if (numRows == 0) { - return false; + boolean fetched = false; + while (sink.getNumRows() < rowsRet) { + if (!fetch.pushRow()) { + if (work.getLeastNumRows() > 0) { + throw new CommandNeedRetryException(); } - totalRows += numRows; - return true; + return fetched; } - - res.add(((Text) mSerde.serialize(io.o, io.oi)).toString()); - numRows++; + fetched = true; } - totalRows += numRows; return true; } catch (CommandNeedRetryException e) { throw e; @@ -171,6 +148,8 @@ public class FetchTask extends Task implements Serializable { throw e; } catch (Exception e) { throw new IOException(e); + } finally { + totalRows += sink.getNumRows(); } } @@ -203,8 +182,8 @@ public class FetchTask extends Task implements Serializable { * @throws HiveException */ public void clearFetch() throws HiveException { - if (null != ftOp) { - ftOp.clearFetchContext(); + if (fetch != null) { + fetch.clearFetchContext(); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java new file mode 100644 index 0000000..9d9a352 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import java.util.ArrayList; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ListSinkDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.util.ReflectionUtils; + +/** + * For fetch task with operator tree, row read from FetchOperator is processed via operator tree + * and finally arrives to this operator. + */ +public class ListSinkOperator extends Operator { + + private transient SerDe mSerde; + + private transient ArrayList res; + private transient int numRows; + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + try { + mSerde = initializeSerde(hconf); + initializeChildren(hconf); + } catch (Exception e) { + throw new HiveException(e); + } + } + + private SerDe initializeSerde(Configuration conf) throws Exception { + String serdeName = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEFETCHOUTPUTSERDE); + Class serdeClass = Class.forName(serdeName, true, + JavaUtils.getClassLoader()).asSubclass(SerDe.class); + // cast only needed for Hadoop 0.17 compatibility + SerDe serde = ReflectionUtils.newInstance(serdeClass, null); + + Properties serdeProp = new Properties(); + + // this is the default serialization format + if (serde instanceof DelimitedJSONSerDe) { + serdeProp.put(Constants.SERIALIZATION_FORMAT, "" + Utilities.tabCode); + serdeProp.put(Constants.SERIALIZATION_NULL_FORMAT, getConf().getSerializationNullFormat()); + } + serde.initialize(conf, serdeProp); + return serde; + } + + public ListSinkOperator initialize(SerDe mSerde) { + this.mSerde = mSerde; + return this; + } + + public void reset(ArrayList res) { + this.res = res; + this.numRows = 0; + } + + public int getNumRows() { + return numRows; + } + + public void processOp(Object row, int tag) throws HiveException { + try { + res.add(mSerde.serialize(row, outputObjInspector).toString()); + numRows++; + } catch (SerDeException e) { + throw new HiveException(e); + } + } + + public OperatorType getType() { + return OperatorType.FORWARD; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 2553931..dd4571e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -33,7 +33,6 @@ import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.io.IOContext; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.MapredWork; @@ -494,13 +493,16 @@ public class MapOperator extends Operator implements Serializable { // The child operators cleanup if input file has changed cleanUpInputFileChanged(); } + ExecMapperContext context = getExecContext(); Object row = null; try { if (this.hasVC) { this.rowWithPartAndVC[0] = deserializer.deserialize(value); int vcPos = isPartitioned ? 2 : 1; - populateVirtualColumnValues(); + if (context != null) { + populateVirtualColumnValues(context, vcs, vcValues, deserializer); + } this.rowWithPartAndVC[vcPos] = this.vcValues; } else if (!isPartitioned) { row = deserializer.deserialize((Writable) value); @@ -549,54 +551,60 @@ public class MapOperator extends Operator implements Serializable { } } - private void populateVirtualColumnValues() { - if (this.vcs != null) { - ExecMapperContext mapExecCxt = this.getExecContext(); - IOContext ioCxt = mapExecCxt.getIoCxt(); - for (int i = 0; i < vcs.size(); i++) { - VirtualColumn vc = vcs.get(i); - if (vc.equals(VirtualColumn.FILENAME) && mapExecCxt.inputFileChanged()) { - this.vcValues[i] = new Text(mapExecCxt.getCurrentInputFile()); - } else if (vc.equals(VirtualColumn.BLOCKOFFSET)) { - long current = ioCxt.getCurrentBlockStart(); - LongWritable old = (LongWritable) this.vcValues[i]; - if (old == null) { - old = new LongWritable(current); - this.vcValues[i] = old; - continue; - } - if (current != old.get()) { - old.set(current); - } - } else if (vc.equals(VirtualColumn.ROWOFFSET)) { - long current = ioCxt.getCurrentRow(); - LongWritable old = (LongWritable) this.vcValues[i]; - if (old == null) { - old = new LongWritable(current); - this.vcValues[i] = old; - continue; - } - if (current != old.get()) { - old.set(current); - } - } else if (vc.equals(VirtualColumn.RAWDATASIZE)) { - long current = 0L; - SerDeStats stats = this.deserializer.getSerDeStats(); - if(stats != null) { - current = stats.getRawDataSize(); - } - LongWritable old = (LongWritable) this.vcValues[i]; - if (old == null) { - old = new LongWritable(current); - this.vcValues[i] = old; - continue; - } - if (current != old.get()) { - old.set(current); - } + public static Writable[] populateVirtualColumnValues(ExecMapperContext ctx, + List vcs, Writable[] vcValues, Deserializer deserializer) { + if (vcs == null) { + return vcValues; + } + if (vcValues == null) { + vcValues = new Writable[vcs.size()]; + } + for (int i = 0; i < vcs.size(); i++) { + VirtualColumn vc = vcs.get(i); + if (vc.equals(VirtualColumn.FILENAME)) { + if (ctx.inputFileChanged()) { + vcValues[i] = new Text(ctx.getCurrentInputFile()); + } + } else if (vc.equals(VirtualColumn.BLOCKOFFSET)) { + long current = ctx.getIoCxt().getCurrentBlockStart(); + LongWritable old = (LongWritable) vcValues[i]; + if (old == null) { + old = new LongWritable(current); + vcValues[i] = old; + continue; + } + if (current != old.get()) { + old.set(current); + } + } else if (vc.equals(VirtualColumn.ROWOFFSET)) { + long current = ctx.getIoCxt().getCurrentRow(); + LongWritable old = (LongWritable) vcValues[i]; + if (old == null) { + old = new LongWritable(current); + vcValues[i] = old; + continue; + } + if (current != old.get()) { + old.set(current); + } + } else if (vc.equals(VirtualColumn.RAWDATASIZE)) { + long current = 0L; + SerDeStats stats = deserializer.getSerDeStats(); + if(stats != null) { + current = stats.getRawDataSize(); + } + LongWritable old = (LongWritable) vcValues[i]; + if (old == null) { + old = new LongWritable(current); + vcValues[i] = old; + continue; + } + if (current != old.get()) { + old.set(current); } } } + return vcValues; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 08362a6..346d938 100755 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -339,7 +339,7 @@ public class HiveInputFormat return partDesc; } - protected void pushFilters(JobConf jobConf, TableScanOperator tableScan) { + public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) { TableScanDesc scanDesc = tableScan.getConf(); if (scanDesc == null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java index 2e30af8..1ca530c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java @@ -39,6 +39,10 @@ public class IOContext { return IOContext.threadLocal.get(); } + public static void clear() { + IOContext.threadLocal.remove(); + } + long currentBlockStart; long nextBlockStart; long currentRow; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 94a5037..67d3a99 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -77,6 +77,7 @@ public class Optimizer { if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVELIMITOPTENABLE)) { transformations.add(new GlobalLimitOptimizer()); } + transformations.add(new SimpleFetchOptimizer()); // must be called last } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java new file mode 100644 index 0000000..9d12fc3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -0,0 +1,245 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.FetchTask; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.ListSinkOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.QB; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.SplitSample; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.FetchWork; +import org.apache.hadoop.hive.ql.plan.ListSinkDesc; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.TableDesc; + +/** + * Tries to convert simple fetch query to single fetch task, which fetches rows directly + * from location of table/partition. + */ +public class SimpleFetchOptimizer implements Transform { + + private final Log LOG = LogFactory.getLog(SimpleFetchOptimizer.class.getName()); + + public ParseContext transform(ParseContext pctx) throws SemanticException { + Map> topOps = pctx.getTopOps(); + if (pctx.getQB().isSimpleSelectQuery() && topOps.size() == 1) { + // no join, no groupby, no distinct, no lateral view, no subq, + // no CTAS or insert, not analyze command, and single sourced. + String alias = (String) pctx.getTopOps().keySet().toArray()[0]; + Operator topOp = (Operator) pctx.getTopOps().values().toArray()[0]; + if (topOp instanceof TableScanOperator) { + try { + FetchTask fetchTask = optimize(pctx, alias, (TableScanOperator) topOp); + if (fetchTask != null) { + pctx.setFetchTask(fetchTask); + } + } catch (HiveException e) { + // Has to use full name to make sure it does not conflict with + // org.apache.commons.lang.StringUtils + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + if (e instanceof SemanticException) { + throw (SemanticException) e; + } + throw new SemanticException(e.getMessage(), e); + } + } + } + return pctx; + } + + // returns non-null FetchTask instance when succeeded + @SuppressWarnings("unchecked") + private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator source) + throws HiveException { + String mode = HiveConf.getVar( + pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSION); + + boolean aggressive = "more".equals(mode); + FetchData fetch = checkTree(aggressive, pctx, alias, source); + if (fetch != null) { + int limit = pctx.getQB().getParseInfo().getOuterQueryLimit(); + FetchWork fetchWork = fetch.convertToWork(); + FetchTask fetchTask = (FetchTask) TaskFactory.get(fetchWork, pctx.getConf()); + fetchWork.setSink(fetch.completed(pctx, fetchWork)); + fetchWork.setSource(source); + fetchWork.setLimit(limit); + return fetchTask; + } + return null; + } + + // all we can handle is LimitOperator, FilterOperator SelectOperator and final FS + // + // for non-aggressive mode (minimal) + // 1. samping is not allowed + // 2. for partitioned table, all filters should be targeted to partition column + // 3. SelectOperator should be select star + private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, + TableScanOperator ts) throws HiveException { + SplitSample splitSample = pctx.getNameToSplitSample().get(alias); + if (!aggressive && splitSample != null) { + return null; + } + QB qb = pctx.getQB(); + if (!aggressive && qb.hasTableSample(alias)) { + return null; + } + + Table table = qb.getMetaData().getAliasToTable().get(alias); + if (table == null) { + return null; + } + if (!table.isPartitioned()) { + return checkOperators(new FetchData(table, splitSample), ts, aggressive, false); + } + + boolean bypassFilter = false; + if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) { + ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts); + bypassFilter = PartitionPruner.onlyContainsPartnCols(table, pruner); + } + if (aggressive || bypassFilter) { + PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts); + if (aggressive || pruned.getUnknownPartns().isEmpty()) { + bypassFilter &= pruned.getUnknownPartns().isEmpty(); + return checkOperators(new FetchData(pruned, splitSample), ts, aggressive, bypassFilter); + } + } + return null; + } + + private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean aggresive, + boolean bypassFilter) { + if (ts.getChildOperators().size() != 1) { + return null; + } + Operator op = ts.getChildOperators().get(0); + for (; ; op = op.getChildOperators().get(0)) { + if (aggresive) { + if (!(op instanceof LimitOperator || op instanceof FilterOperator + || op instanceof SelectOperator)) { + break; + } + } else if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter) + || (op instanceof SelectOperator && ((SelectOperator) op).getConf().isSelectStar()))) { + break; + } + if (op.getChildOperators() == null || op.getChildOperators().size() != 1) { + return null; + } + } + if (op instanceof FileSinkOperator) { + fetch.fileSink = op; + return fetch; + } + return null; + } + + private class FetchData { + + private final Table table; + private final SplitSample splitSample; + private final PrunedPartitionList partsList; + private final HashSet inputs = new HashSet(); + + // this is always non-null when conversion is completed + private Operator fileSink; + + private FetchData(Table table, SplitSample splitSample) { + this.table = table; + this.partsList = null; + this.splitSample = splitSample; + } + + private FetchData(PrunedPartitionList partsList, SplitSample splitSample) { + this.table = null; + this.partsList = partsList; + this.splitSample = splitSample; + } + + private FetchWork convertToWork() throws HiveException { + inputs.clear(); + if (table != null) { + inputs.add(new ReadEntity(table)); + String path = table.getPath().toString(); + FetchWork work = new FetchWork(path, Utilities.getTableDesc(table)); + PlanUtils.configureInputJobPropertiesForStorageHandler(work.getTblDesc()); + work.setSplitSample(splitSample); + return work; + } + List listP = new ArrayList(); + List partP = new ArrayList(); + + for (Partition partition : partsList.getNotDeniedPartns()) { + inputs.add(new ReadEntity(partition)); + listP.add(partition.getPartitionPath().toString()); + partP.add(Utilities.getPartitionDesc(partition)); + } + TableDesc table = Utilities.getTableDesc(partsList.getSourceTable()); + FetchWork work = new FetchWork(listP, partP, table); + if (!work.getPartDesc().isEmpty()) { + PartitionDesc part0 = work.getPartDesc().get(0); + PlanUtils.configureInputJobPropertiesForStorageHandler(part0.getTableDesc()); + work.setSplitSample(splitSample); + } + return work; + } + + // this optimizer is for replacing FS to temp+fetching from temp with + // single direct fetching, which means FS is not needed any more when conversion completed. + // rows forwarded will be received by ListSinkOperator, which is replacing FS + private ListSinkOperator completed(ParseContext pctx, FetchWork work) { + pctx.getSemanticInputs().addAll(inputs); + ListSinkOperator sink = new ListSinkOperator(); + sink.setConf(new ListSinkDesc(work.getSerializationNullFormat())); + sink.setParentOperators(new ArrayList>()); + Operator parent = fileSink.getParentOperators().get(0); + sink.getParentOperators().add(parent); + parent.replaceChild(fileSink, sink); + fileSink.setParentOperators(null); + return sink; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java index 9e67a85..8d7bac2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java @@ -21,7 +21,7 @@ package org.apache.hadoop.hive.ql.parse; import org.apache.hadoop.hive.ql.plan.LimitDesc; /** - * + * context for pruning inputs. populated by GlobalLimitOptimizer */ public class GlobalLimitCtx { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index ea5e3de..0bdc818 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -29,6 +29,7 @@ import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; +import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; @@ -37,7 +38,9 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.LineageInfo; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; @@ -100,6 +103,8 @@ public class ParseContext { private HashSet semanticInputs; private List> rootTasks; + private FetchTask fetchTask; + public ParseContext() { } @@ -533,4 +538,23 @@ public class ParseContext { this.rootTasks.remove(rootTask); this.rootTasks.addAll(tasks); } + + public FetchTask getFetchTask() { + return fetchTask; + } + + public void setFetchTask(FetchTask fetchTask) { + this.fetchTask = fetchTask; + } + + public PrunedPartitionList getPrunedPartitions(String alias, TableScanOperator ts) + throws HiveException { + PrunedPartitionList partsList = opToPartList.get(ts); + if (partsList == null) { + partsList = PartitionPruner.prune(topToTable.get(ts), + opToPartPruner.get(ts), conf, alias, prunedPartitions); + opToPartList.put(ts, partsList); + } + return partsList; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java index cefc274..bfc9835 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java @@ -92,8 +92,8 @@ public class PrunedPartitionList { */ public List getNotDeniedPartns() { List partitions = new ArrayList(); - partitions.addAll(unknownPartns); partitions.addAll(confirmedPartns); + partitions.addAll(unknownPartns); return partitions; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index fa4f71d..413a04d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -183,8 +183,13 @@ public class QB { return isQuery; } - public boolean isSelectStarQuery() { - return qbp.isSelectStarQuery() && aliasToSubq.isEmpty() && !isCTAS() && !qbp.isAnalyzeCommand(); + public boolean isSimpleSelectQuery() { + return qbp.isSimpleSelectQuery() && aliasToSubq.isEmpty() && !isCTAS() && + !qbp.isAnalyzeCommand(); + } + + public boolean hasTableSample(String alias) { + return qbp.getTabSample(alias) != null; } public CreateTableDesc getTableDesc() { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index f1b749d..5624bef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -376,8 +376,8 @@ public class QBParseInfo { this.outerQueryLimit = outerQueryLimit; } - public boolean isSelectStarQuery() { - if (isSubQ || (joinExpr != null) || (!nameToSample.isEmpty()) + public boolean isSimpleSelectQuery() { + if (isSubQ || (joinExpr != null) || (!destToGroupby.isEmpty()) || (!destToClusterby.isEmpty()) || (!aliasToLateralViews.isEmpty())) { return false; @@ -413,23 +413,6 @@ public class QBParseInfo { } } - iter = destToSelExpr.entrySet().iterator(); - while (iter.hasNext()) { - Map.Entry entry = iter.next(); - ASTNode selExprList = entry.getValue(); - // Iterate over the selects - for (int i = 0; i < selExprList.getChildCount(); ++i) { - - // list of the columns - ASTNode selExpr = (ASTNode) selExprList.getChild(i); - ASTNode sel = (ASTNode) selExpr.getChild(0); - - if (sel.getToken().getType() != HiveParser.TOK_ALLCOLREF) { - return false; - } - } - } - return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index a53c7d7..562d437 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -110,7 +110,6 @@ import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; import org.apache.hadoop.hive.ql.optimizer.Optimizer; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer; -import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType; import org.apache.hadoop.hive.ql.plan.AggregationDesc; @@ -142,7 +141,6 @@ import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ScriptDesc; @@ -284,6 +282,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { qb = pctx.getQB(); groupOpToInputTables = pctx.getGroupOpToInputTables(); prunedPartitions = pctx.getPrunedPartitions(); + fetchTask = pctx.getFetchTask(); setLineageInfo(pctx.getLineageInfo()); } @@ -6898,95 +6897,16 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } @SuppressWarnings("nls") - private void genMapRedTasks(QB qb) throws SemanticException { - FetchWork fetch = null; - List> mvTask = new ArrayList>(); - FetchTask fetchTask = null; - - QBParseInfo qbParseInfo = qb.getParseInfo(); - - // Does this query need reduce job - if (qb.isSelectStarQuery() && qbParseInfo.getDestToClusterBy().isEmpty() - && qbParseInfo.getDestToDistributeBy().isEmpty() - && qbParseInfo.getDestToOrderBy().isEmpty() - && qbParseInfo.getDestToSortBy().isEmpty()) { - boolean noMapRed = false; - - Iterator> iter = qb.getMetaData() - .getAliasToTable().entrySet().iterator(); - Table tab = (iter.next()).getValue(); - if (!tab.isPartitioned()) { - if (qbParseInfo.getDestToWhereExpr().isEmpty()) { - fetch = new FetchWork(tab.getPath().toString(), Utilities - .getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit()); - noMapRed = true; - inputs.add(new ReadEntity(tab)); - } - } else { - - if (topOps.size() == 1) { - TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0]; - - // check if the pruner only contains partition columns - if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts), - opToPartPruner.get(ts))) { - - PrunedPartitionList partsList = null; - try { - partsList = opToPartList.get(ts); - if (partsList == null) { - partsList = PartitionPruner.prune(topToTable.get(ts), - opToPartPruner.get(ts), conf, (String) topOps.keySet() - .toArray()[0], prunedPartitions); - opToPartList.put(ts, partsList); - } - } catch (HiveException e) { - // Has to use full name to make sure it does not conflict with - // org.apache.commons.lang.StringUtils - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } - - // If there is any unknown partition, create a map-reduce job for - // the filter to prune correctly - if ((partsList.getUnknownPartns().size() == 0)) { - List listP = new ArrayList(); - List partP = new ArrayList(); - - Set parts = partsList.getConfirmedPartns(); - Iterator iterParts = parts.iterator(); - while (iterParts.hasNext()) { - Partition part = iterParts.next(); - - listP.add(part.getPartitionPath().toString()); - try { - partP.add(Utilities.getPartitionDesc(part)); - } catch (HiveException e) { - throw new SemanticException(e.getMessage(), e); - } - inputs.add(new ReadEntity(part)); - } - - TableDesc table = Utilities.getTableDesc(partsList.getSourceTable()); - fetch = new FetchWork(listP, partP, table, qb.getParseInfo() - .getOuterQueryLimit()); - noMapRed = true; - } - } - } - } - - if (noMapRed) { - PlanUtils.configureInputJobPropertiesForStorageHandler(fetch.getTblDesc()); - fetchTask = (FetchTask) TaskFactory.get(fetch, conf); - setFetchTask(fetchTask); - - // remove root tasks if any - rootTasks.clear(); - return; - } + private void genMapRedTasks(ParseContext pCtx) throws SemanticException { + if (pCtx.getFetchTask() != null) { + // replaced by single fetch task + init(pCtx); + return; } + init(pCtx); + List> mvTask = new ArrayList>(); + // In case of a select, use a fetch task instead of a move task if (qb.getIsQuery()) { if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) { @@ -6998,10 +6918,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT); TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat); - fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(), + FetchWork fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(), resultTab, qb.getParseInfo().getOuterQueryLimit()); - fetchTask = (FetchTask) TaskFactory.get(fetch, conf); + FetchTask fetchTask = (FetchTask) TaskFactory.get(fetch, conf); setFetchTask(fetchTask); // For the FetchTask, the limit optimiztion requires we fetch all the rows @@ -7425,12 +7345,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { optm.setPctx(pCtx); optm.initialize(conf); pCtx = optm.optimize(); - init(pCtx); - qb = pCtx.getQB(); // At this point we have the complete operator tree // from which we want to find the reduce operator - genMapRedTasks(qb); + genMapRedTasks(pCtx); LOG.info("Completed plan generation"); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java index e513958..313e772 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java @@ -23,6 +23,9 @@ import java.util.ArrayList; import java.util.List; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ListSinkOperator; +import org.apache.hadoop.hive.ql.parse.SplitSample; /** * FetchWork. @@ -38,9 +41,14 @@ public class FetchWork implements Serializable { private ArrayList partDir; private ArrayList partDesc; + private Operator source; + private ListSinkOperator sink; + private int limit; private int leastNumRows; + private SplitSample splitSample; + /** * Serialization Null Format for the serde used to fetch data. */ @@ -71,6 +79,14 @@ public class FetchWork implements Serializable { this.limit = limit; } + public void initializeForFetch() { + if (source == null) { + sink = new ListSinkOperator(); + sink.setConf(new ListSinkDesc(serializationNullFormat)); + source = sink; + } + } + public String getSerializationNullFormat() { return serializationNullFormat; } @@ -79,6 +95,14 @@ public class FetchWork implements Serializable { serializationNullFormat = format; } + public boolean isNotPartitioned() { + return tblDir != null; + } + + public boolean isPartitioned() { + return tblDir == null; + } + /** * @return the tblDir */ @@ -200,6 +224,31 @@ public class FetchWork implements Serializable { this.leastNumRows = leastNumRows; } + @Explain(displayName = "Processor Tree") + public Operator getSource() { + return source; + } + + public void setSource(Operator source) { + this.source = source; + } + + public ListSinkOperator getSink() { + return sink; + } + + public void setSink(ListSinkOperator sink) { + this.sink = sink; + } + + public void setSplitSample(SplitSample splitSample) { + this.splitSample = splitSample; + } + + public SplitSample getSplitSample() { + return splitSample; + } + @Override public String toString() { if (tblDir != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ListSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ListSinkDesc.java new file mode 100644 index 0000000..8ae3afb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ListSinkDesc.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; + +/** + * description for ListSinkOperator, just for explain result. + */ +@Explain(displayName = "ListSink") +public class ListSinkDesc implements Serializable { + + private static final long serialVersionUID = 1L; + + private String serializationNullFormat = "NULL"; + + public ListSinkDesc() { + } + + public ListSinkDesc(String serializationNullFormat) { + this.serializationNullFormat = serializationNullFormat; + } + + public String getSerializationNullFormat() { + return serializationNullFormat; + } + + public void setSerializationNullFormat(String serializationNullFormat) { + this.serializationNullFormat = serializationNullFormat; + } +} diff --git ql/src/test/queries/clientpositive/nonmr_fetch.q ql/src/test/queries/clientpositive/nonmr_fetch.q new file mode 100644 index 0000000..e961e93 --- /dev/null +++ ql/src/test/queries/clientpositive/nonmr_fetch.q @@ -0,0 +1,83 @@ +set hive.fetch.task.conversion=minimal; + +-- backward compatible (minimal) +explain select * from src limit 10; +select * from src limit 10; + +explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; +select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; + +-- negative, select expression +explain select key from src limit 10; +select key from src limit 10; + +-- negative, filter on non-partition column +explain select * from srcpart where key > 100 limit 10; +select * from srcpart where key > 100 limit 10; + +-- negative, table sampling +explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10; +select * from src TABLESAMPLE (0.25 PERCENT) limit 10; + +set hive.fetch.task.conversion=more; + +-- backward compatible (more) +explain select * from src limit 10; +select * from src limit 10; + +explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; +select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; + +-- select expression +explain select cast(key as int) * 10, upper(value) from src limit 10; +select cast(key as int) * 10, upper(value) from src limit 10; + +-- filter on non-partition column +explain select key from src where key < 100 limit 10; +select key from src where key < 100 limit 10; + +-- select expr for partitioned table +explain select key from srcpart where ds='2008-04-08' AND hr='11' limit 10; +select key from srcpart where ds='2008-04-08' AND hr='11' limit 10; + +-- virtual columns +explain select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 10 limit 10; +select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 100 limit 10; + +-- virtual columns on partitioned table +explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30; +select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30; + +-- bucket sampling +explain select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key); +select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key); +explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key); +select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key); + +-- split sampling +explain select * from src TABLESAMPLE (0.25 PERCENT); +select * from src TABLESAMPLE (0.25 PERCENT); +explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT); +select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT); + +-- non deterministic func +explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1; +select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1; + +-- negative, groupby +explain select key, count(value) from src group by key; + +-- negative, distinct +explain select distinct key, value from src; + +-- negative, CTAS +explain create table srcx as select distinct key, value from src; + +-- negative, analyze +explain analyze table src compute statistics; + +-- negative, subq +explain select a.* from (select * from src) a; + +-- negative, join +explain select * from src join src src2 on src.key=src2.key; diff --git ql/src/test/results/clientpositive/input0.q.out ql/src/test/results/clientpositive/input0.q.out index 7822452..dc02196 100644 --- ql/src/test/results/clientpositive/input0.q.out +++ ql/src/test/results/clientpositive/input0.q.out @@ -14,6 +14,17 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 + Processor Tree: + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + ListSink PREHOOK: query: SELECT * FROM src diff --git ql/src/test/results/clientpositive/input_limit.q.out ql/src/test/results/clientpositive/input_limit.q.out index 1fc627e..24a4a23 100644 --- ql/src/test/results/clientpositive/input_limit.q.out +++ ql/src/test/results/clientpositive/input_limit.q.out @@ -14,6 +14,18 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: 20 + Processor Tree: + TableScan + alias: x + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Limit + ListSink PREHOOK: query: SELECT x.* FROM SRC x LIMIT 20 diff --git ql/src/test/results/clientpositive/input_part0.q.out ql/src/test/results/clientpositive/input_part0.q.out index 38cbd64..0d42436 100644 --- ql/src/test/results/clientpositive/input_part0.q.out +++ ql/src/test/results/clientpositive/input_part0.q.out @@ -14,6 +14,21 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 + Processor Tree: + TableScan + alias: x + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink PREHOOK: query: SELECT x.* FROM SRCPART x WHERE x.ds = '2008-04-08' diff --git ql/src/test/results/clientpositive/input_part3.q.out ql/src/test/results/clientpositive/input_part3.q.out index af92fa7..b54d01b 100644 --- ql/src/test/results/clientpositive/input_part3.q.out +++ ql/src/test/results/clientpositive/input_part3.q.out @@ -14,6 +14,21 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 + Processor Tree: + TableScan + alias: x + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink PREHOOK: query: SELECT x.* FROM SRCPART x WHERE x.ds = '2008-04-08' and x.hr = 11 diff --git ql/src/test/results/clientpositive/input_part4.q.out ql/src/test/results/clientpositive/input_part4.q.out index 9a1de46..87fd523 100644 --- ql/src/test/results/clientpositive/input_part4.q.out +++ ql/src/test/results/clientpositive/input_part4.q.out @@ -14,6 +14,25 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 + Processor Tree: + TableScan + alias: x + Filter Operator + predicate: + expr: ((ds = '2008-04-08') and (hr = 15.0)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink PREHOOK: query: SELECT x.* FROM SRCPART x WHERE x.ds = '2008-04-08' and x.hr = 15 diff --git ql/src/test/results/clientpositive/input_part8.q.out ql/src/test/results/clientpositive/input_part8.q.out index 59e09c9..8d01562 100644 --- ql/src/test/results/clientpositive/input_part8.q.out +++ ql/src/test/results/clientpositive/input_part8.q.out @@ -14,6 +14,22 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: 10 + Processor Tree: + TableScan + alias: x + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Limit + ListSink PREHOOK: query: SELECT x.* FROM SRCPART x WHERE ds = '2008-04-08' LIMIT 10 diff --git ql/src/test/results/clientpositive/nonmr_fetch.q.out ql/src/test/results/clientpositive/nonmr_fetch.q.out new file mode 100644 index 0000000..ba87df5 --- /dev/null +++ ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -0,0 +1,1342 @@ +PREHOOK: query: -- backward compatible (minimal) +explain select * from src limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- backward compatible (minimal) +explain select * from src limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Limit + ListSink + + +PREHOOK: query: select * from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +409 val_409 +255 val_255 +278 val_278 +98 val_98 +484 val_484 +PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: srcpart + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Limit + ListSink + + +PREHOOK: query: select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +238 val_238 2008-04-08 11 +86 val_86 2008-04-08 11 +311 val_311 2008-04-08 11 +27 val_27 2008-04-08 11 +165 val_165 2008-04-08 11 +409 val_409 2008-04-08 11 +255 val_255 2008-04-08 11 +278 val_278 2008-04-08 11 +98 val_98 2008-04-08 11 +484 val_484 2008-04-08 11 +PREHOOK: query: -- negative, select expression +explain select key from src limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- negative, select expression +explain select key from src limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 10 + + +PREHOOK: query: select key from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +86 +311 +27 +165 +409 +255 +278 +98 +484 +PREHOOK: query: -- negative, filter on non-partition column +explain select * from srcpart where key > 100 limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- negative, filter on non-partition column +explain select * from srcpart where key > 100 limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Filter Operator + predicate: + expr: (key > 100.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 10 + + +PREHOOK: query: select * from srcpart where key > 100 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where key > 100 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +238 val_238 2008-04-08 11 +311 val_311 2008-04-08 11 +165 val_165 2008-04-08 11 +409 val_409 2008-04-08 11 +255 val_255 2008-04-08 11 +278 val_278 2008-04-08 11 +484 val_484 2008-04-08 11 +265 val_265 2008-04-08 11 +193 val_193 2008-04-08 11 +401 val_401 2008-04-08 11 +PREHOOK: query: -- negative, table sampling +explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- negative, table sampling +explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Percentage Sample: + src + percentage: 0.25 + seed number: 0 + + Stage: Stage-0 + Fetch Operator + limit: 10 + + +PREHOOK: query: select * from src TABLESAMPLE (0.25 PERCENT) limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src TABLESAMPLE (0.25 PERCENT) limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +409 val_409 +255 val_255 +278 val_278 +98 val_98 +484 val_484 +PREHOOK: query: -- backward compatible (more) +explain select * from src limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- backward compatible (more) +explain select * from src limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Limit + ListSink + + +PREHOOK: query: select * from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +409 val_409 +255 val_255 +278 val_278 +98 val_98 +484 val_484 +PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: srcpart + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Limit + ListSink + + +PREHOOK: query: select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +238 val_238 2008-04-08 11 +86 val_86 2008-04-08 11 +311 val_311 2008-04-08 11 +27 val_27 2008-04-08 11 +165 val_165 2008-04-08 11 +409 val_409 2008-04-08 11 +255 val_255 2008-04-08 11 +278 val_278 2008-04-08 11 +98 val_98 2008-04-08 11 +484 val_484 2008-04-08 11 +PREHOOK: query: -- select expression +explain select cast(key as int) * 10, upper(value) from src limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- select expression +explain select cast(key as int) * 10, upper(value) from src limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (* (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)) 10)) (TOK_SELEXPR (TOK_FUNCTION upper (TOK_TABLE_OR_COL value)))) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Select Operator + expressions: + expr: (UDFToInteger(key) * 10) + type: int + expr: upper(value) + type: string + outputColumnNames: _col0, _col1 + Limit + ListSink + + +PREHOOK: query: select cast(key as int) * 10, upper(value) from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select cast(key as int) * 10, upper(value) from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2380 VAL_238 +860 VAL_86 +3110 VAL_311 +270 VAL_27 +1650 VAL_165 +4090 VAL_409 +2550 VAL_255 +2780 VAL_278 +980 VAL_98 +4840 VAL_484 +PREHOOK: query: -- filter on non-partition column +explain select key from src where key < 100 limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- filter on non-partition column +explain select key from src where key < 100 limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Filter Operator + predicate: + expr: (key < 100.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Limit + ListSink + + +PREHOOK: query: select key from src where key < 100 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from src where key < 100 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +86 +27 +98 +66 +37 +15 +82 +17 +0 +57 +PREHOOK: query: -- select expr for partitioned table +explain select key from srcpart where ds='2008-04-08' AND hr='11' limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- select expr for partitioned table +explain select key from srcpart where ds='2008-04-08' AND hr='11' limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: srcpart + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Limit + ListSink + + +PREHOOK: query: select key from srcpart where ds='2008-04-08' AND hr='11' limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select key from srcpart where ds='2008-04-08' AND hr='11' limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +238 +86 +311 +27 +165 +409 +255 +278 +98 +484 +PREHOOK: query: -- virtual columns +explain select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 10 limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- virtual columns +explain select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 10 limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Filter Operator + predicate: + expr: (key < 10.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: BLOCK__OFFSET__INSIDE__FILE + type: bigint + outputColumnNames: _col0, _col1, _col2 + Limit + ListSink + + +PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 100 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 100 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +86 val_86 12 +27 val_27 34 +98 val_98 92 +66 val_66 198 +37 val_37 328 +15 val_15 386 +82 val_82 396 +17 val_17 910 +0 val_0 968 +57 val_57 1024 +PREHOOK: query: -- virtual columns on partitioned table +explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30 +PREHOOK: type: QUERY +POSTHOOK: query: -- virtual columns on partitioned table +explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_LIMIT 30))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 30 + Processor Tree: + TableScan + alias: srcpart + Filter Operator + predicate: + expr: (key < 10.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + expr: BLOCK__OFFSET__INSIDE__FILE + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Limit + ListSink + + +PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 val_0 2008-04-08 11 968 +4 val_4 2008-04-08 11 1218 +8 val_8 2008-04-08 11 1916 +0 val_0 2008-04-08 11 2088 +0 val_0 2008-04-08 11 2632 +5 val_5 2008-04-08 11 3060 +5 val_5 2008-04-08 11 3864 +2 val_2 2008-04-08 11 4004 +5 val_5 2008-04-08 11 4540 +9 val_9 2008-04-08 11 5398 +0 val_0 2008-04-08 12 968 +4 val_4 2008-04-08 12 1218 +8 val_8 2008-04-08 12 1916 +0 val_0 2008-04-08 12 2088 +0 val_0 2008-04-08 12 2632 +5 val_5 2008-04-08 12 3060 +5 val_5 2008-04-08 12 3864 +2 val_2 2008-04-08 12 4004 +5 val_5 2008-04-08 12 4540 +9 val_9 2008-04-08 12 5398 +0 val_0 2008-04-09 11 968 +4 val_4 2008-04-09 11 1218 +8 val_8 2008-04-09 11 1916 +0 val_0 2008-04-09 11 2088 +0 val_0 2008-04-09 11 2632 +5 val_5 2008-04-09 11 3060 +5 val_5 2008-04-09 11 3864 +2 val_2 2008-04-09 11 4004 +5 val_5 2008-04-09 11 4540 +9 val_9 2008-04-09 11 5398 +PREHOOK: query: -- bucket sampling +explain select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) +PREHOOK: type: QUERY +POSTHOOK: query: -- bucket sampling +explain select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLEBUCKETSAMPLE 1 40 (TOK_TABLE_OR_COL key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Filter Operator + predicate: + expr: (((hash(key) & 2147483647) % 40) = 0) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: BLOCK__OFFSET__INSIDE__FILE + type: bigint + outputColumnNames: _col0, _col1, _col2 + ListSink + + +PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +484 val_484 102 +286 val_286 1404 +187 val_187 1416 +187 val_187 2492 +77 val_77 2622 +187 val_187 4516 +448 val_448 5636 +PREHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) (TOK_TABLEBUCKETSAMPLE 1 40 (TOK_TABLE_OR_COL key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: srcpart + Filter Operator + predicate: + expr: (((hash(key) & 2147483647) % 40) = 0) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + expr: BLOCK__OFFSET__INSIDE__FILE + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + ListSink + + +PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +484 val_484 2008-04-08 11 102 +286 val_286 2008-04-08 11 1404 +187 val_187 2008-04-08 11 1416 +187 val_187 2008-04-08 11 2492 +77 val_77 2008-04-08 11 2622 +187 val_187 2008-04-08 11 4516 +448 val_448 2008-04-08 11 5636 +484 val_484 2008-04-08 12 102 +286 val_286 2008-04-08 12 1404 +187 val_187 2008-04-08 12 1416 +187 val_187 2008-04-08 12 2492 +77 val_77 2008-04-08 12 2622 +187 val_187 2008-04-08 12 4516 +448 val_448 2008-04-08 12 5636 +484 val_484 2008-04-09 11 102 +286 val_286 2008-04-09 11 1404 +187 val_187 2008-04-09 11 1416 +187 val_187 2008-04-09 11 2492 +77 val_77 2008-04-09 11 2622 +187 val_187 2008-04-09 11 4516 +448 val_448 2008-04-09 11 5636 +484 val_484 2008-04-09 12 102 +286 val_286 2008-04-09 12 1404 +187 val_187 2008-04-09 12 1416 +187 val_187 2008-04-09 12 2492 +77 val_77 2008-04-09 12 2622 +187 val_187 2008-04-09 12 4516 +448 val_448 2008-04-09 12 5636 +PREHOOK: query: -- split sampling +explain select * from src TABLESAMPLE (0.25 PERCENT) +PREHOOK: type: QUERY +POSTHOOK: query: -- split sampling +explain select * from src TABLESAMPLE (0.25 PERCENT) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + ListSink + + +PREHOOK: query: select * from src TABLESAMPLE (0.25 PERCENT) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src TABLESAMPLE (0.25 PERCENT) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 +86 val_86 +PREHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT) +PREHOOK: type: QUERY +POSTHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) (TOK_TABLESPLITSAMPLE 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: srcpart + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + expr: BLOCK__OFFSET__INSIDE__FILE + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + ListSink + + +PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +238 val_238 2008-04-08 11 0 +86 val_86 2008-04-08 11 12 +238 val_238 2008-04-08 12 0 +86 val_86 2008-04-08 12 12 +238 val_238 2008-04-09 11 0 +86 val_86 2008-04-09 11 12 +238 val_238 2008-04-09 12 0 +86 val_86 2008-04-09 12 12 +PREHOOK: query: -- non deterministic func +explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- non deterministic func +explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) "2008-04-09") (> (TOK_FUNCTION rand) 1))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: srcpart + Filter Operator + predicate: + expr: (rand() > 1) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: BLOCK__OFFSET__INSIDE__FILE + type: bigint + outputColumnNames: _col0, _col1, _col2 + ListSink + + +PREHOOK: query: select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: -- negative, groupby +explain select key, count(value) from src group by key +PREHOOK: type: QUERY +POSTHOOK: query: -- negative, groupby +explain select key, count(value) from src group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(value) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- negative, distinct +explain select distinct key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: -- negative, distinct +explain select distinct key, value from src +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- negative, CTAS +explain create table srcx as select distinct key, value from src +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: -- negative, CTAS +explain create table srcx as select distinct key, value from src +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME srcx) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + name: default.srcx + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-3 + Create Table Operator: + Create Table + columns: key string, value string + if not exists: false + input format: org.apache.hadoop.mapred.TextInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: srcx + isExternal: false + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: -- negative, analyze +explain analyze table src compute statistics +PREHOOK: type: QUERY +POSTHOOK: query: -- negative, analyze +explain analyze table src compute statistics +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_ANALYZE (TOK_TAB (TOK_TABNAME src))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + + Stage: Stage-1 + Stats-Aggr Operator + + +PREHOOK: query: -- negative, subq +explain select a.* from (select * from src) a +PREHOOK: type: QUERY +POSTHOOK: query: -- negative, subq +explain select a.* from (select * from src) a +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- negative, join +explain select * from src join src src2 on src.key=src2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- negative, join +explain select * from src join src src2 on src.key=src2.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src) src2) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + src2 + TableScan + alias: src2 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/ppr_pushdown3.q.out ql/src/test/results/clientpositive/ppr_pushdown3.q.out index 038fa5b..fa91f75 100644 --- ql/src/test/results/clientpositive/ppr_pushdown3.q.out +++ ql/src/test/results/clientpositive/ppr_pushdown3.q.out @@ -111,6 +111,21 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 + Processor Tree: + TableScan + alias: srcpart + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink PREHOOK: query: select * from srcpart diff --git ql/src/test/results/clientpositive/regex_col.q.out ql/src/test/results/clientpositive/regex_col.q.out index b0f8bf2..d5d74b6 100644 --- ql/src/test/results/clientpositive/regex_col.q.out +++ ql/src/test/results/clientpositive/regex_col.q.out @@ -14,6 +14,21 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 + Processor Tree: + TableScan + alias: srcpart + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink PREHOOK: query: EXPLAIN diff --git ql/src/test/results/clientpositive/source.q.out ql/src/test/results/clientpositive/source.q.out index 7b51480..ae2449e 100644 --- ql/src/test/results/clientpositive/source.q.out +++ ql/src/test/results/clientpositive/source.q.out @@ -14,6 +14,17 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator limit: -1 + Processor Tree: + TableScan + alias: x + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + ListSink PREHOOK: query: SELECT x.* FROM SRC x diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java index b21755e..c798fe6 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java @@ -290,6 +290,17 @@ public final class ObjectInspectorFactory { return result; } + public static UnionStructObjectInspector repackUnionStructObjectInspector( + UnionStructObjectInspector unionInspector, List fieldsInspector) { + if (unionInspector == null) { + return getUnionStructObjectInspector(fieldsInspector); + } + cachedUnionStructObjectInspector.remove(fieldsInspector); + unionInspector.init(fieldsInspector); + cachedUnionStructObjectInspector.put(fieldsInspector, unionInspector); + return unionInspector; + } + static HashMap, ColumnarStructObjectInspector> cachedColumnarStructObjectInspector = new HashMap, ColumnarStructObjectInspector>(); public static ColumnarStructObjectInspector getColumnarStructObjectInspector(