diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 367f056..e98c24f 100644
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -546,6 +546,9 @@ public class HiveConf extends Configuration {
HIVEOUTERJOINSUPPORTSFILTERS("hive.outerjoin.supports.filters", true),
+ // 'minimal', 'more' (and 'all' later)
+ HIVEFETCHTASKCONVERSION("hive.fetch.task.conversion", "minimal"),
+
// Serde for FetchTask
HIVEFETCHOUTPUTSERDE("hive.fetch.output.serde", "org.apache.hadoop.hive.serde2.DelimitedJSONSerDe"),
diff --git conf/hive-default.xml.template conf/hive-default.xml.template
index 150bbeb..b2b3342 100644
--- conf/hive-default.xml.template
+++ conf/hive-default.xml.template
@@ -1389,5 +1389,17 @@
+
+ hive.fetch.task.conversion
+ minimal
+
+ Some select queries can be converted to single FETCH task minimizing latency.
+ Currently the query should be single sourced not having any subquery and should not have
+ any aggregations or distincts (which incurrs RS), lateral views and joins.
+ 1. minimal : SELECT STAR, FILTER on partition columns, LIMIT only
+ 2. more : SELECT, FILTER, LIMIT only (+TABLESAMPLE, virtual columns)
+
+
+
diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index 567aafa..c4421ba 100644
--- ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -442,13 +442,6 @@ public class Driver implements CommandProcessor {
sem.validate();
plan = new QueryPlan(command, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN));
- // initialize FetchTask right here
- if (plan.getFetchTask() != null) {
- plan.getFetchTask().initialize(conf, plan, null);
- }
-
- // get the output schema
- schema = getSchema(sem, conf);
// test Only - serialize the query plan and deserialize it
if ("true".equalsIgnoreCase(System.getProperty("test.serialize.qplan"))) {
@@ -477,6 +470,9 @@ public class Driver implements CommandProcessor {
plan.getFetchTask().initialize(conf, plan, null);
}
+ // get the output schema
+ schema = getSchema(sem, conf);
+
//do the authorization check
if (HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java
index a633314..c66cf0c 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java
@@ -68,7 +68,10 @@ public class ExecMapperContext {
ioCxt = IOContext.get();
}
-
+ public void clear() {
+ IOContext.clear();
+ ioCxt = null;
+ }
/**
* For CompbineFileInputFormat, the mapper's input file will be changed on the
@@ -174,5 +177,4 @@ public class ExecMapperContext {
public void setIoCxt(IOContext ioCxt) {
this.ioCxt = ioCxt;
}
-
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
index 4af7a13..7ecdabf 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
@@ -35,7 +35,12 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.hive.ql.io.HiveRecordReader;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.parse.SplitSample;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -46,6 +51,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
@@ -66,40 +72,82 @@ public class FetchOperator implements Serializable {
private boolean isNativeTable;
private FetchWork work;
+ private Operator> operator; // operator tree for processing row further (option)
private int splitNum;
private PartitionDesc currPart;
private TableDesc currTbl;
private boolean tblDataDone;
+ private boolean hasVC;
+ private boolean isPartitioned;
+ private StructObjectInspector vcsOI;
+ private List vcCols;
+ private ExecMapperContext context;
+
private transient RecordReader currRecReader;
- private transient InputSplit[] inputSplits;
+ private transient FetchInputFormatSplit[] inputSplits;
private transient InputFormat inputFormat;
private transient JobConf job;
private transient WritableComparable key;
private transient Writable value;
+ private transient Writable[] vcValues;
private transient Deserializer serde;
private transient Iterator iterPath;
private transient Iterator iterPartDesc;
private transient Path currPath;
private transient StructObjectInspector rowObjectInspector;
- private transient Object[] rowWithPart;
+ private transient Object[] row;
+
public FetchOperator() {
}
public FetchOperator(FetchWork work, JobConf job) {
+ this.job = job;
this.work = work;
- initialize(job);
+ initialize();
}
- public void initialize(JobConf job) {
+ public FetchOperator(FetchWork work, JobConf job, Operator> operator,
+ List vcCols) {
this.job = job;
+ this.work = work;
+ this.operator = operator;
+ this.vcCols = vcCols;
+ initialize();
+ }
+
+ private void initialize() {
+ if (hasVC = vcCols != null && !vcCols.isEmpty()) {
+ List names = new ArrayList(vcCols.size());
+ List inspectors = new ArrayList(vcCols.size());
+ for (VirtualColumn vc : vcCols) {
+ inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ vc.getTypeInfo().getPrimitiveCategory()));
+ names.add(vc.getName());
+ }
+ vcsOI = ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors);
+ vcValues = new Writable[vcCols.size()];
+ }
+ isPartitioned = work.isPartitioned();
tblDataDone = false;
- rowWithPart = new Object[2];
- if (work.getTblDir() != null) {
+ if (hasVC && isPartitioned) {
+ row = new Object[3];
+ } else if (hasVC || isPartitioned) {
+ row = new Object[2];
+ } else {
+ row = new Object[1];
+ }
+ if (work.getTblDesc() != null) {
isNativeTable = !work.getTblDesc().isNonNative();
} else {
isNativeTable = true;
}
+ if (hasVC || work.getSplitSample() != null) {
+ context = new ExecMapperContext();
+ if (operator != null) {
+ operator.setExecContext(context);
+ }
+ }
}
public FetchWork getWork() {
@@ -151,6 +199,7 @@ public class FetchOperator implements Serializable {
*/
private static Map> inputFormats = new HashMap>();
+ @SuppressWarnings("unchecked")
static InputFormat getInputFormatFromCache(Class inputFormatClass,
Configuration conf) throws IOException {
if (!inputFormats.containsKey(inputFormatClass)) {
@@ -166,17 +215,36 @@ public class FetchOperator implements Serializable {
return inputFormats.get(inputFormatClass);
}
- private void setPrtnDesc(TableDesc table, Map partSpec) throws Exception {
+ private StructObjectInspector setPrtnDesc(TableDesc table) throws Exception {
+ Deserializer serde = table.getDeserializerClass().newInstance();
+ serde.initialize(job, table.getProperties());
+ return rowObjectInspector = createRowInspector(serde);
+ }
+
+ private StructObjectInspector setPrtnDesc(TableDesc table,
+ Map partSpec) throws Exception {
+ Deserializer serde = table.getDeserializerClass().newInstance();
+ serde.initialize(job, table.getProperties());
String pcols = table.getProperties().getProperty(
org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);
String[] partKeys = pcols.trim().split("/");
if (partSpec != null) {
- rowWithPart[1] = createPartValue(partKeys, partSpec);
+ row[1] = createPartValue(partKeys, partSpec);
}
- rowObjectInspector = createRowInspector(partKeys);
+ currPart = null;
+ return rowObjectInspector = createRowInspector(serde, partKeys);
+ }
+
+ private StructObjectInspector createRowInspector(Deserializer serde) throws SerDeException {
+ StructObjectInspector inspector = (StructObjectInspector) serde.getObjectInspector();
+
+ return hasVC ? ObjectInspectorFactory.repackUnionStructObjectInspector(
+ (UnionStructObjectInspector) rowObjectInspector, Arrays.asList(inspector, vcsOI))
+ : inspector;
}
- private StructObjectInspector createRowInspector(String[] partKeys) throws SerDeException {
+ private StructObjectInspector createRowInspector(Deserializer serde, String[] partKeys)
+ throws SerDeException {
List partNames = new ArrayList();
List partObjectInspectors = new ArrayList();
for (String key : partKeys) {
@@ -187,8 +255,15 @@ public class FetchOperator implements Serializable {
.getStandardStructObjectInspector(partNames, partObjectInspectors);
StructObjectInspector inspector = (StructObjectInspector) serde.getObjectInspector();
- return ObjectInspectorFactory.getUnionStructObjectInspector(
- Arrays.asList(inspector, partObjectInspector));
+ // TableScanOperator in FetchTask (source) is initialized by first output OI, which is
+ // acquired from FetchOperator#getOutputObjectInspector. But this can be changed at runtime
+ // for partitioned table and the changed outputOI is returned with row for FetchTask.
+ // But for the operator tree, OIs cannot be forwarded with row.
+ // So just repacks inside of rowObjectInspector with changed OIs.
+ return ObjectInspectorFactory.repackUnionStructObjectInspector(
+ (UnionStructObjectInspector) rowObjectInspector,
+ hasVC ? Arrays.asList(inspector, partObjectInspector, vcsOI) :
+ Arrays.asList(inspector, partObjectInspector));
}
private List createPartValue(String[] partKeys, Map partSpec) {
@@ -202,7 +277,7 @@ public class FetchOperator implements Serializable {
private void getNextPath() throws Exception {
// first time
if (iterPath == null) {
- if (work.getTblDir() != null) {
+ if (work.isNotPartitioned()) {
if (!tblDataDone) {
currPath = work.getTblDirPath();
currTbl = work.getTblDesc();
@@ -280,9 +355,19 @@ public class FetchOperator implements Serializable {
tmp = new PartitionDesc(currTbl, null);
}
- inputFormat = getInputFormatFromCache(tmp.getInputFileFormatClass(), job);
+ Class extends InputFormat> formatter = tmp.getInputFileFormatClass();
+ inputFormat = getInputFormatFromCache(formatter, job);
Utilities.copyTableJobPropertiesToConf(tmp.getTableDesc(), job);
- inputSplits = inputFormat.getSplits(job, 1);
+ InputSplit[] splits = inputFormat.getSplits(job, 1);
+ FetchInputFormatSplit[] inputSplits = new FetchInputFormatSplit[splits.length];
+ for (int i = 0; i < splits.length; i++) {
+ inputSplits[i] = new FetchInputFormatSplit(splits[i], formatter.getName());
+ }
+ if (work.getSplitSample() != null) {
+ inputSplits = splitSampling(work.getSplitSample(), inputSplits);
+ }
+ this.inputSplits = inputSplits;
+
splitNum = 0;
serde = tmp.getDeserializerClass().newInstance();
serde.initialize(job, tmp.getProperties());
@@ -307,12 +392,74 @@ public class FetchOperator implements Serializable {
return getRecordReader();
}
- currRecReader = inputFormat.getRecordReader(inputSplits[splitNum++], job, Reporter.NULL);
+ final FetchInputFormatSplit target = inputSplits[splitNum];
+
+ @SuppressWarnings("unchecked")
+ final RecordReader reader =
+ inputFormat.getRecordReader(target.getInputSplit(), job, Reporter.NULL);
+ if (hasVC || work.getSplitSample() != null) {
+ currRecReader = new HiveRecordReader(reader, job) {
+ @Override
+ public boolean doNext(WritableComparable key, Writable value) throws IOException {
+ // if current pos is larger than shrinkedLength which is calculated for
+ // each split by table sampling, stop fetching any more (early exit)
+ if (target.shrinkedLength > 0 &&
+ context.getIoCxt().getCurrentBlockStart() > target.shrinkedLength) {
+ return false;
+ }
+ return super.doNext(key, value);
+ }
+ };
+ ((HiveContextAwareRecordReader)currRecReader).
+ initIOContext(target, job, inputFormat.getClass(), reader);
+ } else {
+ currRecReader = reader;
+ }
+ splitNum++;
key = currRecReader.createKey();
value = currRecReader.createValue();
return currRecReader;
}
+ private FetchInputFormatSplit[] splitSampling(SplitSample splitSample,
+ FetchInputFormatSplit[] splits) {
+ long totalSize = 0;
+ for (FetchInputFormatSplit split: splits) {
+ totalSize += split.getLength();
+ }
+ List result = new ArrayList();
+ long targetSize = (long) (totalSize * splitSample.getPercent() / 100D);
+ int startIndex = splitSample.getSeedNum() % splits.length;
+ long size = 0;
+ for (int i = 0; i < splits.length; i++) {
+ FetchInputFormatSplit split = splits[(startIndex + i) % splits.length];
+ result.add(split);
+ long splitgLength = split.getLength();
+ if (size + splitgLength >= targetSize) {
+ if (size + splitgLength > targetSize) {
+ split.shrinkedLength = targetSize - size;
+ }
+ break;
+ }
+ size += splitgLength;
+ }
+ return result.toArray(new FetchInputFormatSplit[result.size()]);
+ }
+
+ /**
+ * Get the next row and push down it to operator tree.
+ * Currently only used by FetchTask.
+ **/
+ public boolean pushRow() throws IOException, HiveException {
+ InspectableObject row = getNextRow();
+ if (row != null) {
+ operator.process(row.o, 0);
+ }
+ return row != null;
+ }
+
+ private transient final InspectableObject inspectable = new InspectableObject();
+
/**
* Get the next row. The fetch context is modified appropriately.
*
@@ -320,6 +467,9 @@ public class FetchOperator implements Serializable {
public InspectableObject getNextRow() throws IOException {
try {
while (true) {
+ if (context != null) {
+ context.resetRow();
+ }
if (currRecReader == null) {
currRecReader = getRecordReader();
if (currRecReader == null) {
@@ -329,13 +479,27 @@ public class FetchOperator implements Serializable {
boolean ret = currRecReader.next(key, value);
if (ret) {
- if (this.currPart == null) {
- Object obj = serde.deserialize(value);
- return new InspectableObject(obj, serde.getObjectInspector());
- } else {
- rowWithPart[0] = serde.deserialize(value);
- return new InspectableObject(rowWithPart, rowObjectInspector);
+ if (operator != null && context != null && context.inputFileChanged()) {
+ // The child operators cleanup if input file has changed
+ try {
+ operator.cleanUpInputFileChanged();
+ } catch (HiveException e) {
+ throw new IOException(e);
+ }
+ }
+ if (hasVC) {
+ vcValues = MapOperator.populateVirtualColumnValues(context, vcCols, vcValues, serde);
+ row[isPartitioned ? 2 : 1] = vcValues;
}
+ row[0] = serde.deserialize(value);
+ if (hasVC || isPartitioned) {
+ inspectable.o = row;
+ inspectable.oi = rowObjectInspector;
+ return inspectable;
+ }
+ inspectable.o = row[0];
+ inspectable.oi = serde.getObjectInspector();
+ return inspectable;
} else {
currRecReader.close();
currRecReader = null;
@@ -356,6 +520,14 @@ public class FetchOperator implements Serializable {
currRecReader.close();
currRecReader = null;
}
+ if (operator != null) {
+ operator.close(false);
+ operator = null;
+ }
+ if (context != null) {
+ context.clear();
+ context = null;
+ }
this.currPath = null;
this.iterPath = null;
this.iterPartDesc = null;
@@ -373,7 +545,7 @@ public class FetchOperator implements Serializable {
this.iterPath = iterPath;
this.iterPartDesc = iterPartDesc;
if (iterPartDesc == null) {
- if (work.getTblDir() != null) {
+ if (work.isNotPartitioned()) {
this.currTbl = work.getTblDesc();
} else {
// hack, get the first.
@@ -388,28 +560,15 @@ public class FetchOperator implements Serializable {
*/
public ObjectInspector getOutputObjectInspector() throws HiveException {
try {
- if (work.getTblDir() != null) {
- TableDesc tbl = work.getTblDesc();
- Deserializer serde = tbl.getDeserializerClass().newInstance();
- serde.initialize(job, tbl.getProperties());
- return serde.getObjectInspector();
+ if (work.isNotPartitioned()) {
+ return setPrtnDesc(work.getTblDesc());
}
- TableDesc tbl;
- Map partSpec;
List listParts = work.getPartDesc();
if (listParts == null || listParts.isEmpty()) {
- tbl = work.getTblDesc();
- partSpec = null;
- } else {
- currPart = listParts.get(0);
- tbl = currPart.getTableDesc();
- partSpec = currPart.getPartSpec();
+ return setPrtnDesc(work.getTblDesc(), null);
}
- serde = tbl.getDeserializerClass().newInstance();
- serde.initialize(job, tbl.getProperties());
- setPrtnDesc(tbl, partSpec);
- currPart = null;
- return rowObjectInspector;
+ PartitionDesc currPart = listParts.get(0);
+ return setPrtnDesc(currPart.getTableDesc(), currPart.getPartSpec());
} catch (Exception e) {
throw new HiveException("Failed with exception " + e.getMessage()
+ org.apache.hadoop.util.StringUtils.stringifyException(e));
@@ -440,4 +599,20 @@ public class FetchOperator implements Serializable {
}
return results.toArray(new FileStatus[results.size()]);
}
+
+ // for split sampling. shrinkedLength is checked against IOContext.getCurrentBlockStart,
+ // which is from RecordReader.getPos(). So some inputformats which does not support getPos()
+ // like HiveHBaseTableInputFormat cannot be used with this (todo)
+ private static class FetchInputFormatSplit extends HiveInputFormat.HiveInputSplit {
+
+ // shrinked size for this split. counter part of this in normal mode is
+ // InputSplitShim.shrinkedLength.
+ // what's different is that this is evaluated by unit of row using RecordReader.getPos()
+ // and that is evaluated by unit of split using InputSplt.getLength().
+ private long shrinkedLength = -1;
+
+ public FetchInputFormatSplit(InputSplit split, String name) {
+ super(split, name);
+ }
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
index f8373a3..c76c37d 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java
@@ -21,27 +21,24 @@ package org.apache.hadoop.hive.ql.exec;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
-import java.util.Properties;
+import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.QueryPlan;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.api.StageType;
-import org.apache.hadoop.hive.serde.Constants;
-import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
-import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
/**
@@ -51,9 +48,10 @@ public class FetchTask extends Task implements Serializable {
private static final long serialVersionUID = 1L;
private int maxRows = 100;
- private FetchOperator ftOp;
- private SerDe mSerde;
+ private FetchOperator fetch;
+ private ListSinkOperator sink;
private int totalRows;
+
private static transient final Log LOG = LogFactory.getLog(FetchTask.class);
public FetchTask() {
@@ -63,28 +61,22 @@ public class FetchTask extends Task implements Serializable {
@Override
public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) {
super.initialize(conf, queryPlan, ctx);
+ work.initializeForFetch();
try {
// Create a file system handle
JobConf job = new JobConf(conf, ExecDriver.class);
- String serdeName = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEFETCHOUTPUTSERDE);
- Class extends SerDe> serdeClass = Class.forName(serdeName, true,
- JavaUtils.getClassLoader()).asSubclass(SerDe.class);
- // cast only needed for Hadoop 0.17 compatibility
- mSerde = (SerDe) ReflectionUtils.newInstance(serdeClass, null);
-
- Properties serdeProp = new Properties();
-
- // this is the default serialization format
- if (mSerde instanceof DelimitedJSONSerDe) {
- serdeProp.put(Constants.SERIALIZATION_FORMAT, "" + Utilities.tabCode);
- serdeProp.put(Constants.SERIALIZATION_NULL_FORMAT, work.getSerializationNullFormat());
+ Operator> source = work.getSource();
+ if (source instanceof TableScanOperator) {
+ TableScanOperator ts = (TableScanOperator) source;
+ HiveInputFormat.pushFilters(job, ts);
+ ColumnProjectionUtils.appendReadColumnIDs(job, ts.getNeededColumnIDs());
}
+ sink = work.getSink();
+ fetch = new FetchOperator(work, job, source, getVirtualColumns(source));
+ source.initialize(conf, new ObjectInspector[]{fetch.getOutputObjectInspector()});
- mSerde.initialize(job, serdeProp);
-
- ftOp = new FetchOperator(work, job);
} catch (Exception e) {
// Bail out ungracefully - we should never hit
// this here - but would have hit it in SemanticAnalyzer
@@ -93,6 +85,13 @@ public class FetchTask extends Task implements Serializable {
}
}
+ private List getVirtualColumns(Operator> ts) {
+ if (ts instanceof TableScanOperator && ts.getConf() != null) {
+ return ((TableScanOperator)ts).getConf().getVirtualCols();
+ }
+ return null;
+ }
+
@Override
public int execute(DriverContext driverContext) {
assert false;
@@ -122,48 +121,26 @@ public class FetchTask extends Task implements Serializable {
@Override
public boolean fetch(ArrayList res) throws IOException, CommandNeedRetryException {
+ sink.reset(res);
try {
- int numRows = 0;
- int rowsRet = maxRows;
-
- if (work.getLeastNumRows() > 0) {
- if (totalRows == work.getLeastNumRows()) {
- return false;
- }
- for (int i = 0; i < work.getLeastNumRows(); i++) {
- InspectableObject io = ftOp.getNextRow();
- if (io == null) {
- throw new CommandNeedRetryException();
- }
- res.add(((Text) mSerde.serialize(io.o, io.oi)).toString());
- numRows++;
- }
- totalRows = work.getLeastNumRows();
- return true;
- }
-
- if ((work.getLimit() >= 0) && ((work.getLimit() - totalRows) < rowsRet)) {
- rowsRet = work.getLimit() - totalRows;
+ int rowsRet = work.getLeastNumRows();
+ if (rowsRet <= 0) {
+ rowsRet = work.getLimit() >= 0 ? Math.min(work.getLimit() - totalRows, maxRows) : maxRows;
}
if (rowsRet <= 0) {
- ftOp.clearFetchContext();
+ fetch.clearFetchContext();
return false;
}
-
- while (numRows < rowsRet) {
- InspectableObject io = ftOp.getNextRow();
- if (io == null) {
- if (numRows == 0) {
- return false;
+ boolean fetched = false;
+ while (sink.getNumRows() < rowsRet) {
+ if (!fetch.pushRow()) {
+ if (work.getLeastNumRows() > 0) {
+ throw new CommandNeedRetryException();
}
- totalRows += numRows;
- return true;
+ return fetched;
}
-
- res.add(((Text) mSerde.serialize(io.o, io.oi)).toString());
- numRows++;
+ fetched = true;
}
- totalRows += numRows;
return true;
} catch (CommandNeedRetryException e) {
throw e;
@@ -171,6 +148,8 @@ public class FetchTask extends Task implements Serializable {
throw e;
} catch (Exception e) {
throw new IOException(e);
+ } finally {
+ totalRows += sink.getNumRows();
}
}
@@ -203,8 +182,8 @@ public class FetchTask extends Task implements Serializable {
* @throws HiveException
*/
public void clearFetch() throws HiveException {
- if (null != ftOp) {
- ftOp.clearFetchContext();
+ if (fetch != null) {
+ fetch.clearFetchContext();
}
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java
new file mode 100644
index 0000000..9d9a352
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import java.util.ArrayList;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ListSinkDesc;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * For fetch task with operator tree, row read from FetchOperator is processed via operator tree
+ * and finally arrives to this operator.
+ */
+public class ListSinkOperator extends Operator {
+
+ private transient SerDe mSerde;
+
+ private transient ArrayList res;
+ private transient int numRows;
+
+ @Override
+ protected void initializeOp(Configuration hconf) throws HiveException {
+ try {
+ mSerde = initializeSerde(hconf);
+ initializeChildren(hconf);
+ } catch (Exception e) {
+ throw new HiveException(e);
+ }
+ }
+
+ private SerDe initializeSerde(Configuration conf) throws Exception {
+ String serdeName = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEFETCHOUTPUTSERDE);
+ Class extends SerDe> serdeClass = Class.forName(serdeName, true,
+ JavaUtils.getClassLoader()).asSubclass(SerDe.class);
+ // cast only needed for Hadoop 0.17 compatibility
+ SerDe serde = ReflectionUtils.newInstance(serdeClass, null);
+
+ Properties serdeProp = new Properties();
+
+ // this is the default serialization format
+ if (serde instanceof DelimitedJSONSerDe) {
+ serdeProp.put(Constants.SERIALIZATION_FORMAT, "" + Utilities.tabCode);
+ serdeProp.put(Constants.SERIALIZATION_NULL_FORMAT, getConf().getSerializationNullFormat());
+ }
+ serde.initialize(conf, serdeProp);
+ return serde;
+ }
+
+ public ListSinkOperator initialize(SerDe mSerde) {
+ this.mSerde = mSerde;
+ return this;
+ }
+
+ public void reset(ArrayList res) {
+ this.res = res;
+ this.numRows = 0;
+ }
+
+ public int getNumRows() {
+ return numRows;
+ }
+
+ public void processOp(Object row, int tag) throws HiveException {
+ try {
+ res.add(mSerde.serialize(row, outputObjInspector).toString());
+ numRows++;
+ } catch (SerDeException e) {
+ throw new HiveException(e);
+ }
+ }
+
+ public OperatorType getType() {
+ return OperatorType.FORWARD;
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
index 2553931..dd4571e 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
@@ -33,7 +33,6 @@ import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.io.IOContext;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.plan.MapredWork;
@@ -494,13 +493,16 @@ public class MapOperator extends Operator implements Serializable {
// The child operators cleanup if input file has changed
cleanUpInputFileChanged();
}
+ ExecMapperContext context = getExecContext();
Object row = null;
try {
if (this.hasVC) {
this.rowWithPartAndVC[0] = deserializer.deserialize(value);
int vcPos = isPartitioned ? 2 : 1;
- populateVirtualColumnValues();
+ if (context != null) {
+ populateVirtualColumnValues(context, vcs, vcValues, deserializer);
+ }
this.rowWithPartAndVC[vcPos] = this.vcValues;
} else if (!isPartitioned) {
row = deserializer.deserialize((Writable) value);
@@ -549,54 +551,60 @@ public class MapOperator extends Operator implements Serializable {
}
}
- private void populateVirtualColumnValues() {
- if (this.vcs != null) {
- ExecMapperContext mapExecCxt = this.getExecContext();
- IOContext ioCxt = mapExecCxt.getIoCxt();
- for (int i = 0; i < vcs.size(); i++) {
- VirtualColumn vc = vcs.get(i);
- if (vc.equals(VirtualColumn.FILENAME) && mapExecCxt.inputFileChanged()) {
- this.vcValues[i] = new Text(mapExecCxt.getCurrentInputFile());
- } else if (vc.equals(VirtualColumn.BLOCKOFFSET)) {
- long current = ioCxt.getCurrentBlockStart();
- LongWritable old = (LongWritable) this.vcValues[i];
- if (old == null) {
- old = new LongWritable(current);
- this.vcValues[i] = old;
- continue;
- }
- if (current != old.get()) {
- old.set(current);
- }
- } else if (vc.equals(VirtualColumn.ROWOFFSET)) {
- long current = ioCxt.getCurrentRow();
- LongWritable old = (LongWritable) this.vcValues[i];
- if (old == null) {
- old = new LongWritable(current);
- this.vcValues[i] = old;
- continue;
- }
- if (current != old.get()) {
- old.set(current);
- }
- } else if (vc.equals(VirtualColumn.RAWDATASIZE)) {
- long current = 0L;
- SerDeStats stats = this.deserializer.getSerDeStats();
- if(stats != null) {
- current = stats.getRawDataSize();
- }
- LongWritable old = (LongWritable) this.vcValues[i];
- if (old == null) {
- old = new LongWritable(current);
- this.vcValues[i] = old;
- continue;
- }
- if (current != old.get()) {
- old.set(current);
- }
+ public static Writable[] populateVirtualColumnValues(ExecMapperContext ctx,
+ List vcs, Writable[] vcValues, Deserializer deserializer) {
+ if (vcs == null) {
+ return vcValues;
+ }
+ if (vcValues == null) {
+ vcValues = new Writable[vcs.size()];
+ }
+ for (int i = 0; i < vcs.size(); i++) {
+ VirtualColumn vc = vcs.get(i);
+ if (vc.equals(VirtualColumn.FILENAME)) {
+ if (ctx.inputFileChanged()) {
+ vcValues[i] = new Text(ctx.getCurrentInputFile());
+ }
+ } else if (vc.equals(VirtualColumn.BLOCKOFFSET)) {
+ long current = ctx.getIoCxt().getCurrentBlockStart();
+ LongWritable old = (LongWritable) vcValues[i];
+ if (old == null) {
+ old = new LongWritable(current);
+ vcValues[i] = old;
+ continue;
+ }
+ if (current != old.get()) {
+ old.set(current);
+ }
+ } else if (vc.equals(VirtualColumn.ROWOFFSET)) {
+ long current = ctx.getIoCxt().getCurrentRow();
+ LongWritable old = (LongWritable) vcValues[i];
+ if (old == null) {
+ old = new LongWritable(current);
+ vcValues[i] = old;
+ continue;
+ }
+ if (current != old.get()) {
+ old.set(current);
+ }
+ } else if (vc.equals(VirtualColumn.RAWDATASIZE)) {
+ long current = 0L;
+ SerDeStats stats = deserializer.getSerDeStats();
+ if(stats != null) {
+ current = stats.getRawDataSize();
+ }
+ LongWritable old = (LongWritable) vcValues[i];
+ if (old == null) {
+ old = new LongWritable(current);
+ vcValues[i] = old;
+ continue;
+ }
+ if (current != old.get()) {
+ old.set(current);
}
}
}
+ return vcValues;
}
@Override
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 08362a6..346d938 100755
--- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -339,7 +339,7 @@ public class HiveInputFormat
return partDesc;
}
- protected void pushFilters(JobConf jobConf, TableScanOperator tableScan) {
+ public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) {
TableScanDesc scanDesc = tableScan.getConf();
if (scanDesc == null) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java
index 2e30af8..1ca530c 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java
@@ -39,6 +39,10 @@ public class IOContext {
return IOContext.threadLocal.get();
}
+ public static void clear() {
+ IOContext.threadLocal.remove();
+ }
+
long currentBlockStart;
long nextBlockStart;
long currentRow;
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
index 94a5037..67d3a99 100644
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
@@ -77,6 +77,7 @@ public class Optimizer {
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVELIMITOPTENABLE)) {
transformations.add(new GlobalLimitOptimizer());
}
+ transformations.add(new SimpleFetchOptimizer()); // must be called last
}
/**
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
new file mode 100644
index 0000000..9d12fc3
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
@@ -0,0 +1,245 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.exec.ListSinkOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.QB;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.SplitSample;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.FetchWork;
+import org.apache.hadoop.hive.ql.plan.ListSinkDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.PlanUtils;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+
+/**
+ * Tries to convert simple fetch query to single fetch task, which fetches rows directly
+ * from location of table/partition.
+ */
+public class SimpleFetchOptimizer implements Transform {
+
+ private final Log LOG = LogFactory.getLog(SimpleFetchOptimizer.class.getName());
+
+ public ParseContext transform(ParseContext pctx) throws SemanticException {
+ Map> topOps = pctx.getTopOps();
+ if (pctx.getQB().isSimpleSelectQuery() && topOps.size() == 1) {
+ // no join, no groupby, no distinct, no lateral view, no subq,
+ // no CTAS or insert, not analyze command, and single sourced.
+ String alias = (String) pctx.getTopOps().keySet().toArray()[0];
+ Operator topOp = (Operator) pctx.getTopOps().values().toArray()[0];
+ if (topOp instanceof TableScanOperator) {
+ try {
+ FetchTask fetchTask = optimize(pctx, alias, (TableScanOperator) topOp);
+ if (fetchTask != null) {
+ pctx.setFetchTask(fetchTask);
+ }
+ } catch (HiveException e) {
+ // Has to use full name to make sure it does not conflict with
+ // org.apache.commons.lang.StringUtils
+ LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+ if (e instanceof SemanticException) {
+ throw (SemanticException) e;
+ }
+ throw new SemanticException(e.getMessage(), e);
+ }
+ }
+ }
+ return pctx;
+ }
+
+ // returns non-null FetchTask instance when succeeded
+ @SuppressWarnings("unchecked")
+ private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator source)
+ throws HiveException {
+ String mode = HiveConf.getVar(
+ pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSION);
+
+ boolean aggressive = "more".equals(mode);
+ FetchData fetch = checkTree(aggressive, pctx, alias, source);
+ if (fetch != null) {
+ int limit = pctx.getQB().getParseInfo().getOuterQueryLimit();
+ FetchWork fetchWork = fetch.convertToWork();
+ FetchTask fetchTask = (FetchTask) TaskFactory.get(fetchWork, pctx.getConf());
+ fetchWork.setSink(fetch.completed(pctx, fetchWork));
+ fetchWork.setSource(source);
+ fetchWork.setLimit(limit);
+ return fetchTask;
+ }
+ return null;
+ }
+
+ // all we can handle is LimitOperator, FilterOperator SelectOperator and final FS
+ //
+ // for non-aggressive mode (minimal)
+ // 1. samping is not allowed
+ // 2. for partitioned table, all filters should be targeted to partition column
+ // 3. SelectOperator should be select star
+ private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias,
+ TableScanOperator ts) throws HiveException {
+ SplitSample splitSample = pctx.getNameToSplitSample().get(alias);
+ if (!aggressive && splitSample != null) {
+ return null;
+ }
+ QB qb = pctx.getQB();
+ if (!aggressive && qb.hasTableSample(alias)) {
+ return null;
+ }
+
+ Table table = qb.getMetaData().getAliasToTable().get(alias);
+ if (table == null) {
+ return null;
+ }
+ if (!table.isPartitioned()) {
+ return checkOperators(new FetchData(table, splitSample), ts, aggressive, false);
+ }
+
+ boolean bypassFilter = false;
+ if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) {
+ ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts);
+ bypassFilter = PartitionPruner.onlyContainsPartnCols(table, pruner);
+ }
+ if (aggressive || bypassFilter) {
+ PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts);
+ if (aggressive || pruned.getUnknownPartns().isEmpty()) {
+ bypassFilter &= pruned.getUnknownPartns().isEmpty();
+ return checkOperators(new FetchData(pruned, splitSample), ts, aggressive, bypassFilter);
+ }
+ }
+ return null;
+ }
+
+ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean aggresive,
+ boolean bypassFilter) {
+ if (ts.getChildOperators().size() != 1) {
+ return null;
+ }
+ Operator> op = ts.getChildOperators().get(0);
+ for (; ; op = op.getChildOperators().get(0)) {
+ if (aggresive) {
+ if (!(op instanceof LimitOperator || op instanceof FilterOperator
+ || op instanceof SelectOperator)) {
+ break;
+ }
+ } else if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter)
+ || (op instanceof SelectOperator && ((SelectOperator) op).getConf().isSelectStar()))) {
+ break;
+ }
+ if (op.getChildOperators() == null || op.getChildOperators().size() != 1) {
+ return null;
+ }
+ }
+ if (op instanceof FileSinkOperator) {
+ fetch.fileSink = op;
+ return fetch;
+ }
+ return null;
+ }
+
+ private class FetchData {
+
+ private final Table table;
+ private final SplitSample splitSample;
+ private final PrunedPartitionList partsList;
+ private final HashSet inputs = new HashSet();
+
+ // this is always non-null when conversion is completed
+ private Operator> fileSink;
+
+ private FetchData(Table table, SplitSample splitSample) {
+ this.table = table;
+ this.partsList = null;
+ this.splitSample = splitSample;
+ }
+
+ private FetchData(PrunedPartitionList partsList, SplitSample splitSample) {
+ this.table = null;
+ this.partsList = partsList;
+ this.splitSample = splitSample;
+ }
+
+ private FetchWork convertToWork() throws HiveException {
+ inputs.clear();
+ if (table != null) {
+ inputs.add(new ReadEntity(table));
+ String path = table.getPath().toString();
+ FetchWork work = new FetchWork(path, Utilities.getTableDesc(table));
+ PlanUtils.configureInputJobPropertiesForStorageHandler(work.getTblDesc());
+ work.setSplitSample(splitSample);
+ return work;
+ }
+ List listP = new ArrayList();
+ List partP = new ArrayList();
+
+ for (Partition partition : partsList.getNotDeniedPartns()) {
+ inputs.add(new ReadEntity(partition));
+ listP.add(partition.getPartitionPath().toString());
+ partP.add(Utilities.getPartitionDesc(partition));
+ }
+ TableDesc table = Utilities.getTableDesc(partsList.getSourceTable());
+ FetchWork work = new FetchWork(listP, partP, table);
+ if (!work.getPartDesc().isEmpty()) {
+ PartitionDesc part0 = work.getPartDesc().get(0);
+ PlanUtils.configureInputJobPropertiesForStorageHandler(part0.getTableDesc());
+ work.setSplitSample(splitSample);
+ }
+ return work;
+ }
+
+ // this optimizer is for replacing FS to temp+fetching from temp with
+ // single direct fetching, which means FS is not needed any more when conversion completed.
+ // rows forwarded will be received by ListSinkOperator, which is replacing FS
+ private ListSinkOperator completed(ParseContext pctx, FetchWork work) {
+ pctx.getSemanticInputs().addAll(inputs);
+ ListSinkOperator sink = new ListSinkOperator();
+ sink.setConf(new ListSinkDesc(work.getSerializationNullFormat()));
+ sink.setParentOperators(new ArrayList>());
+ Operator extends Serializable> parent = fileSink.getParentOperators().get(0);
+ sink.getParentOperators().add(parent);
+ parent.replaceChild(fileSink, sink);
+ fileSink.setParentOperators(null);
+ return sink;
+ }
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
index 9e67a85..8d7bac2 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
@@ -21,7 +21,7 @@ package org.apache.hadoop.hive.ql.parse;
import org.apache.hadoop.hive.ql.plan.LimitDesc;
/**
- *
+ * context for pruning inputs. populated by GlobalLimitOptimizer
*/
public class GlobalLimitCtx {
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
index ea5e3de..0bdc818 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
@@ -29,6 +29,7 @@ import java.util.Set;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -37,7 +38,9 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.hooks.LineageInfo;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
@@ -100,6 +103,8 @@ public class ParseContext {
private HashSet semanticInputs;
private List> rootTasks;
+ private FetchTask fetchTask;
+
public ParseContext() {
}
@@ -533,4 +538,23 @@ public class ParseContext {
this.rootTasks.remove(rootTask);
this.rootTasks.addAll(tasks);
}
+
+ public FetchTask getFetchTask() {
+ return fetchTask;
+ }
+
+ public void setFetchTask(FetchTask fetchTask) {
+ this.fetchTask = fetchTask;
+ }
+
+ public PrunedPartitionList getPrunedPartitions(String alias, TableScanOperator ts)
+ throws HiveException {
+ PrunedPartitionList partsList = opToPartList.get(ts);
+ if (partsList == null) {
+ partsList = PartitionPruner.prune(topToTable.get(ts),
+ opToPartPruner.get(ts), conf, alias, prunedPartitions);
+ opToPartList.put(ts, partsList);
+ }
+ return partsList;
+ }
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
index cefc274..bfc9835 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
@@ -92,8 +92,8 @@ public class PrunedPartitionList {
*/
public List getNotDeniedPartns() {
List partitions = new ArrayList();
- partitions.addAll(unknownPartns);
partitions.addAll(confirmedPartns);
+ partitions.addAll(unknownPartns);
return partitions;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
index fa4f71d..413a04d 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
@@ -183,8 +183,13 @@ public class QB {
return isQuery;
}
- public boolean isSelectStarQuery() {
- return qbp.isSelectStarQuery() && aliasToSubq.isEmpty() && !isCTAS() && !qbp.isAnalyzeCommand();
+ public boolean isSimpleSelectQuery() {
+ return qbp.isSimpleSelectQuery() && aliasToSubq.isEmpty() && !isCTAS() &&
+ !qbp.isAnalyzeCommand();
+ }
+
+ public boolean hasTableSample(String alias) {
+ return qbp.getTabSample(alias) != null;
}
public CreateTableDesc getTableDesc() {
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
index f1b749d..5624bef 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
@@ -376,8 +376,8 @@ public class QBParseInfo {
this.outerQueryLimit = outerQueryLimit;
}
- public boolean isSelectStarQuery() {
- if (isSubQ || (joinExpr != null) || (!nameToSample.isEmpty())
+ public boolean isSimpleSelectQuery() {
+ if (isSubQ || (joinExpr != null)
|| (!destToGroupby.isEmpty()) || (!destToClusterby.isEmpty())
|| (!aliasToLateralViews.isEmpty())) {
return false;
@@ -413,23 +413,6 @@ public class QBParseInfo {
}
}
- iter = destToSelExpr.entrySet().iterator();
- while (iter.hasNext()) {
- Map.Entry entry = iter.next();
- ASTNode selExprList = entry.getValue();
- // Iterate over the selects
- for (int i = 0; i < selExprList.getChildCount(); ++i) {
-
- // list of the columns
- ASTNode selExpr = (ASTNode) selExprList.getChild(i);
- ASTNode sel = (ASTNode) selExpr.getChild(0);
-
- if (sel.getToken().getType() != HiveParser.TOK_ALLCOLREF) {
- return false;
- }
- }
- }
-
return true;
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index a53c7d7..562d437 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -110,7 +110,6 @@ import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer;
-import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
@@ -142,7 +141,6 @@ import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.ScriptDesc;
@@ -284,6 +282,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
qb = pctx.getQB();
groupOpToInputTables = pctx.getGroupOpToInputTables();
prunedPartitions = pctx.getPrunedPartitions();
+ fetchTask = pctx.getFetchTask();
setLineageInfo(pctx.getLineageInfo());
}
@@ -6898,95 +6897,16 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
@SuppressWarnings("nls")
- private void genMapRedTasks(QB qb) throws SemanticException {
- FetchWork fetch = null;
- List> mvTask = new ArrayList>();
- FetchTask fetchTask = null;
-
- QBParseInfo qbParseInfo = qb.getParseInfo();
-
- // Does this query need reduce job
- if (qb.isSelectStarQuery() && qbParseInfo.getDestToClusterBy().isEmpty()
- && qbParseInfo.getDestToDistributeBy().isEmpty()
- && qbParseInfo.getDestToOrderBy().isEmpty()
- && qbParseInfo.getDestToSortBy().isEmpty()) {
- boolean noMapRed = false;
-
- Iterator> iter = qb.getMetaData()
- .getAliasToTable().entrySet().iterator();
- Table tab = (iter.next()).getValue();
- if (!tab.isPartitioned()) {
- if (qbParseInfo.getDestToWhereExpr().isEmpty()) {
- fetch = new FetchWork(tab.getPath().toString(), Utilities
- .getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit());
- noMapRed = true;
- inputs.add(new ReadEntity(tab));
- }
- } else {
-
- if (topOps.size() == 1) {
- TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0];
-
- // check if the pruner only contains partition columns
- if (PartitionPruner.onlyContainsPartnCols(topToTable.get(ts),
- opToPartPruner.get(ts))) {
-
- PrunedPartitionList partsList = null;
- try {
- partsList = opToPartList.get(ts);
- if (partsList == null) {
- partsList = PartitionPruner.prune(topToTable.get(ts),
- opToPartPruner.get(ts), conf, (String) topOps.keySet()
- .toArray()[0], prunedPartitions);
- opToPartList.put(ts, partsList);
- }
- } catch (HiveException e) {
- // Has to use full name to make sure it does not conflict with
- // org.apache.commons.lang.StringUtils
- LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
- throw new SemanticException(e.getMessage(), e);
- }
-
- // If there is any unknown partition, create a map-reduce job for
- // the filter to prune correctly
- if ((partsList.getUnknownPartns().size() == 0)) {
- List listP = new ArrayList();
- List partP = new ArrayList();
-
- Set parts = partsList.getConfirmedPartns();
- Iterator iterParts = parts.iterator();
- while (iterParts.hasNext()) {
- Partition part = iterParts.next();
-
- listP.add(part.getPartitionPath().toString());
- try {
- partP.add(Utilities.getPartitionDesc(part));
- } catch (HiveException e) {
- throw new SemanticException(e.getMessage(), e);
- }
- inputs.add(new ReadEntity(part));
- }
-
- TableDesc table = Utilities.getTableDesc(partsList.getSourceTable());
- fetch = new FetchWork(listP, partP, table, qb.getParseInfo()
- .getOuterQueryLimit());
- noMapRed = true;
- }
- }
- }
- }
-
- if (noMapRed) {
- PlanUtils.configureInputJobPropertiesForStorageHandler(fetch.getTblDesc());
- fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
- setFetchTask(fetchTask);
-
- // remove root tasks if any
- rootTasks.clear();
- return;
- }
+ private void genMapRedTasks(ParseContext pCtx) throws SemanticException {
+ if (pCtx.getFetchTask() != null) {
+ // replaced by single fetch task
+ init(pCtx);
+ return;
}
+ init(pCtx);
+ List> mvTask = new ArrayList>();
+
// In case of a select, use a fetch task instead of a move task
if (qb.getIsQuery()) {
if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
@@ -6998,10 +6918,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat);
- fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(),
+ FetchWork fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(),
resultTab, qb.getParseInfo().getOuterQueryLimit());
- fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
+ FetchTask fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
setFetchTask(fetchTask);
// For the FetchTask, the limit optimiztion requires we fetch all the rows
@@ -7425,12 +7345,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
optm.setPctx(pCtx);
optm.initialize(conf);
pCtx = optm.optimize();
- init(pCtx);
- qb = pCtx.getQB();
// At this point we have the complete operator tree
// from which we want to find the reduce operator
- genMapRedTasks(qb);
+ genMapRedTasks(pCtx);
LOG.info("Completed plan generation");
diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java
index e513958..313e772 100644
--- ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java
@@ -23,6 +23,9 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ListSinkOperator;
+import org.apache.hadoop.hive.ql.parse.SplitSample;
/**
* FetchWork.
@@ -38,9 +41,14 @@ public class FetchWork implements Serializable {
private ArrayList partDir;
private ArrayList partDesc;
+ private Operator> source;
+ private ListSinkOperator sink;
+
private int limit;
private int leastNumRows;
+ private SplitSample splitSample;
+
/**
* Serialization Null Format for the serde used to fetch data.
*/
@@ -71,6 +79,14 @@ public class FetchWork implements Serializable {
this.limit = limit;
}
+ public void initializeForFetch() {
+ if (source == null) {
+ sink = new ListSinkOperator();
+ sink.setConf(new ListSinkDesc(serializationNullFormat));
+ source = sink;
+ }
+ }
+
public String getSerializationNullFormat() {
return serializationNullFormat;
}
@@ -79,6 +95,14 @@ public class FetchWork implements Serializable {
serializationNullFormat = format;
}
+ public boolean isNotPartitioned() {
+ return tblDir != null;
+ }
+
+ public boolean isPartitioned() {
+ return tblDir == null;
+ }
+
/**
* @return the tblDir
*/
@@ -200,6 +224,31 @@ public class FetchWork implements Serializable {
this.leastNumRows = leastNumRows;
}
+ @Explain(displayName = "Processor Tree")
+ public Operator> getSource() {
+ return source;
+ }
+
+ public void setSource(Operator> source) {
+ this.source = source;
+ }
+
+ public ListSinkOperator getSink() {
+ return sink;
+ }
+
+ public void setSink(ListSinkOperator sink) {
+ this.sink = sink;
+ }
+
+ public void setSplitSample(SplitSample splitSample) {
+ this.splitSample = splitSample;
+ }
+
+ public SplitSample getSplitSample() {
+ return splitSample;
+ }
+
@Override
public String toString() {
if (tblDir != null) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ListSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ListSinkDesc.java
new file mode 100644
index 0000000..8ae3afb
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/ListSinkDesc.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.io.Serializable;
+
+/**
+ * description for ListSinkOperator, just for explain result.
+ */
+@Explain(displayName = "ListSink")
+public class ListSinkDesc implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ private String serializationNullFormat = "NULL";
+
+ public ListSinkDesc() {
+ }
+
+ public ListSinkDesc(String serializationNullFormat) {
+ this.serializationNullFormat = serializationNullFormat;
+ }
+
+ public String getSerializationNullFormat() {
+ return serializationNullFormat;
+ }
+
+ public void setSerializationNullFormat(String serializationNullFormat) {
+ this.serializationNullFormat = serializationNullFormat;
+ }
+}
diff --git ql/src/test/queries/clientpositive/nonmr_fetch.q ql/src/test/queries/clientpositive/nonmr_fetch.q
new file mode 100644
index 0000000..e961e93
--- /dev/null
+++ ql/src/test/queries/clientpositive/nonmr_fetch.q
@@ -0,0 +1,83 @@
+set hive.fetch.task.conversion=minimal;
+
+-- backward compatible (minimal)
+explain select * from src limit 10;
+select * from src limit 10;
+
+explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10;
+select * from srcpart where ds='2008-04-08' AND hr='11' limit 10;
+
+-- negative, select expression
+explain select key from src limit 10;
+select key from src limit 10;
+
+-- negative, filter on non-partition column
+explain select * from srcpart where key > 100 limit 10;
+select * from srcpart where key > 100 limit 10;
+
+-- negative, table sampling
+explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10;
+select * from src TABLESAMPLE (0.25 PERCENT) limit 10;
+
+set hive.fetch.task.conversion=more;
+
+-- backward compatible (more)
+explain select * from src limit 10;
+select * from src limit 10;
+
+explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10;
+select * from srcpart where ds='2008-04-08' AND hr='11' limit 10;
+
+-- select expression
+explain select cast(key as int) * 10, upper(value) from src limit 10;
+select cast(key as int) * 10, upper(value) from src limit 10;
+
+-- filter on non-partition column
+explain select key from src where key < 100 limit 10;
+select key from src where key < 100 limit 10;
+
+-- select expr for partitioned table
+explain select key from srcpart where ds='2008-04-08' AND hr='11' limit 10;
+select key from srcpart where ds='2008-04-08' AND hr='11' limit 10;
+
+-- virtual columns
+explain select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 10 limit 10;
+select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 100 limit 10;
+
+-- virtual columns on partitioned table
+explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30;
+select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30;
+
+-- bucket sampling
+explain select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key);
+select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key);
+explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key);
+select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key);
+
+-- split sampling
+explain select * from src TABLESAMPLE (0.25 PERCENT);
+select * from src TABLESAMPLE (0.25 PERCENT);
+explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT);
+select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT);
+
+-- non deterministic func
+explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1;
+select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1;
+
+-- negative, groupby
+explain select key, count(value) from src group by key;
+
+-- negative, distinct
+explain select distinct key, value from src;
+
+-- negative, CTAS
+explain create table srcx as select distinct key, value from src;
+
+-- negative, analyze
+explain analyze table src compute statistics;
+
+-- negative, subq
+explain select a.* from (select * from src) a;
+
+-- negative, join
+explain select * from src join src src2 on src.key=src2.key;
diff --git ql/src/test/results/clientpositive/nonmr_fetch.q.out ql/src/test/results/clientpositive/nonmr_fetch.q.out
new file mode 100644
index 0000000..ba87df5
--- /dev/null
+++ ql/src/test/results/clientpositive/nonmr_fetch.q.out
@@ -0,0 +1,1342 @@
+PREHOOK: query: -- backward compatible (minimal)
+explain select * from src limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- backward compatible (minimal)
+explain select * from src limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Limit
+ ListSink
+
+
+PREHOOK: query: select * from src limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238 val_238
+86 val_86
+311 val_311
+27 val_27
+165 val_165
+409 val_409
+255 val_255
+278 val_278
+98 val_98
+484 val_484
+PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ TableScan
+ alias: srcpart
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: ds
+ type: string
+ expr: hr
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Limit
+ ListSink
+
+
+PREHOOK: query: select * from srcpart where ds='2008-04-08' AND hr='11' limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: query: select * from srcpart where ds='2008-04-08' AND hr='11' limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+238 val_238 2008-04-08 11
+86 val_86 2008-04-08 11
+311 val_311 2008-04-08 11
+27 val_27 2008-04-08 11
+165 val_165 2008-04-08 11
+409 val_409 2008-04-08 11
+255 val_255 2008-04-08 11
+278 val_278 2008-04-08 11
+98 val_98 2008-04-08 11
+484 val_484 2008-04-08 11
+PREHOOK: query: -- negative, select expression
+explain select key from src limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- negative, select expression
+explain select key from src limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ Limit
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+
+
+PREHOOK: query: select key from src limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key from src limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238
+86
+311
+27
+165
+409
+255
+278
+98
+484
+PREHOOK: query: -- negative, filter on non-partition column
+explain select * from srcpart where key > 100 limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- negative, filter on non-partition column
+explain select * from srcpart where key > 100 limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ srcpart
+ TableScan
+ alias: srcpart
+ Filter Operator
+ predicate:
+ expr: (key > 100.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: ds
+ type: string
+ expr: hr
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Limit
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+
+
+PREHOOK: query: select * from srcpart where key > 100 limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select * from srcpart where key > 100 limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+238 val_238 2008-04-08 11
+311 val_311 2008-04-08 11
+165 val_165 2008-04-08 11
+409 val_409 2008-04-08 11
+255 val_255 2008-04-08 11
+278 val_278 2008-04-08 11
+484 val_484 2008-04-08 11
+265 val_265 2008-04-08 11
+193 val_193 2008-04-08 11
+401 val_401 2008-04-08 11
+PREHOOK: query: -- negative, table sampling
+explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- negative, table sampling
+explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Limit
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ Percentage Sample:
+ src
+ percentage: 0.25
+ seed number: 0
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+
+
+PREHOOK: query: select * from src TABLESAMPLE (0.25 PERCENT) limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src TABLESAMPLE (0.25 PERCENT) limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238 val_238
+86 val_86
+311 val_311
+27 val_27
+165 val_165
+409 val_409
+255 val_255
+278 val_278
+98 val_98
+484 val_484
+PREHOOK: query: -- backward compatible (more)
+explain select * from src limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- backward compatible (more)
+explain select * from src limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Limit
+ ListSink
+
+
+PREHOOK: query: select * from src limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238 val_238
+86 val_86
+311 val_311
+27 val_27
+165 val_165
+409 val_409
+255 val_255
+278 val_278
+98 val_98
+484 val_484
+PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ TableScan
+ alias: srcpart
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: ds
+ type: string
+ expr: hr
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Limit
+ ListSink
+
+
+PREHOOK: query: select * from srcpart where ds='2008-04-08' AND hr='11' limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: query: select * from srcpart where ds='2008-04-08' AND hr='11' limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+238 val_238 2008-04-08 11
+86 val_86 2008-04-08 11
+311 val_311 2008-04-08 11
+27 val_27 2008-04-08 11
+165 val_165 2008-04-08 11
+409 val_409 2008-04-08 11
+255 val_255 2008-04-08 11
+278 val_278 2008-04-08 11
+98 val_98 2008-04-08 11
+484 val_484 2008-04-08 11
+PREHOOK: query: -- select expression
+explain select cast(key as int) * 10, upper(value) from src limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- select expression
+explain select cast(key as int) * 10, upper(value) from src limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (* (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)) 10)) (TOK_SELEXPR (TOK_FUNCTION upper (TOK_TABLE_OR_COL value)))) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: (UDFToInteger(key) * 10)
+ type: int
+ expr: upper(value)
+ type: string
+ outputColumnNames: _col0, _col1
+ Limit
+ ListSink
+
+
+PREHOOK: query: select cast(key as int) * 10, upper(value) from src limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select cast(key as int) * 10, upper(value) from src limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+2380 VAL_238
+860 VAL_86
+3110 VAL_311
+270 VAL_27
+1650 VAL_165
+4090 VAL_409
+2550 VAL_255
+2780 VAL_278
+980 VAL_98
+4840 VAL_484
+PREHOOK: query: -- filter on non-partition column
+explain select key from src where key < 100 limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- filter on non-partition column
+explain select key from src where key < 100 limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ TableScan
+ alias: src
+ Filter Operator
+ predicate:
+ expr: (key < 100.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ Limit
+ ListSink
+
+
+PREHOOK: query: select key from src where key < 100 limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key from src where key < 100 limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86
+27
+98
+66
+37
+15
+82
+17
+0
+57
+PREHOOK: query: -- select expr for partitioned table
+explain select key from srcpart where ds='2008-04-08' AND hr='11' limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- select expr for partitioned table
+explain select key from srcpart where ds='2008-04-08' AND hr='11' limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ TableScan
+ alias: srcpart
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ Limit
+ ListSink
+
+
+PREHOOK: query: select key from srcpart where ds='2008-04-08' AND hr='11' limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: query: select key from srcpart where ds='2008-04-08' AND hr='11' limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+238
+86
+311
+27
+165
+409
+255
+278
+98
+484
+PREHOOK: query: -- virtual columns
+explain select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 10 limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- virtual columns
+explain select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 10 limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ TableScan
+ alias: src
+ Filter Operator
+ predicate:
+ expr: (key < 10.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: BLOCK__OFFSET__INSIDE__FILE
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ Limit
+ ListSink
+
+
+PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 100 limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 100 limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86 val_86 12
+27 val_27 34
+98 val_98 92
+66 val_66 198
+37 val_37 328
+15 val_15 386
+82 val_82 396
+17 val_17 910
+0 val_0 968
+57 val_57 1024
+PREHOOK: query: -- virtual columns on partitioned table
+explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30
+PREHOOK: type: QUERY
+POSTHOOK: query: -- virtual columns on partitioned table
+explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_LIMIT 30)))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 30
+ Processor Tree:
+ TableScan
+ alias: srcpart
+ Filter Operator
+ predicate:
+ expr: (key < 10.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: ds
+ type: string
+ expr: hr
+ type: string
+ expr: BLOCK__OFFSET__INSIDE__FILE
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Limit
+ ListSink
+
+
+PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+0 val_0 2008-04-08 11 968
+4 val_4 2008-04-08 11 1218
+8 val_8 2008-04-08 11 1916
+0 val_0 2008-04-08 11 2088
+0 val_0 2008-04-08 11 2632
+5 val_5 2008-04-08 11 3060
+5 val_5 2008-04-08 11 3864
+2 val_2 2008-04-08 11 4004
+5 val_5 2008-04-08 11 4540
+9 val_9 2008-04-08 11 5398
+0 val_0 2008-04-08 12 968
+4 val_4 2008-04-08 12 1218
+8 val_8 2008-04-08 12 1916
+0 val_0 2008-04-08 12 2088
+0 val_0 2008-04-08 12 2632
+5 val_5 2008-04-08 12 3060
+5 val_5 2008-04-08 12 3864
+2 val_2 2008-04-08 12 4004
+5 val_5 2008-04-08 12 4540
+9 val_9 2008-04-08 12 5398
+0 val_0 2008-04-09 11 968
+4 val_4 2008-04-09 11 1218
+8 val_8 2008-04-09 11 1916
+0 val_0 2008-04-09 11 2088
+0 val_0 2008-04-09 11 2632
+5 val_5 2008-04-09 11 3060
+5 val_5 2008-04-09 11 3864
+2 val_2 2008-04-09 11 4004
+5 val_5 2008-04-09 11 4540
+9 val_9 2008-04-09 11 5398
+PREHOOK: query: -- bucket sampling
+explain select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- bucket sampling
+explain select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLEBUCKETSAMPLE 1 40 (TOK_TABLE_OR_COL key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE)))))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: src
+ Filter Operator
+ predicate:
+ expr: (((hash(key) & 2147483647) % 40) = 0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: BLOCK__OFFSET__INSIDE__FILE
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ ListSink
+
+
+PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+484 val_484 102
+286 val_286 1404
+187 val_187 1416
+187 val_187 2492
+77 val_77 2622
+187 val_187 4516
+448 val_448 5636
+PREHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) (TOK_TABLEBUCKETSAMPLE 1 40 (TOK_TABLE_OR_COL key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE)))))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: srcpart
+ Filter Operator
+ predicate:
+ expr: (((hash(key) & 2147483647) % 40) = 0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: ds
+ type: string
+ expr: hr
+ type: string
+ expr: BLOCK__OFFSET__INSIDE__FILE
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ ListSink
+
+
+PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (BUCKET 1 OUT OF 40 ON key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+484 val_484 2008-04-08 11 102
+286 val_286 2008-04-08 11 1404
+187 val_187 2008-04-08 11 1416
+187 val_187 2008-04-08 11 2492
+77 val_77 2008-04-08 11 2622
+187 val_187 2008-04-08 11 4516
+448 val_448 2008-04-08 11 5636
+484 val_484 2008-04-08 12 102
+286 val_286 2008-04-08 12 1404
+187 val_187 2008-04-08 12 1416
+187 val_187 2008-04-08 12 2492
+77 val_77 2008-04-08 12 2622
+187 val_187 2008-04-08 12 4516
+448 val_448 2008-04-08 12 5636
+484 val_484 2008-04-09 11 102
+286 val_286 2008-04-09 11 1404
+187 val_187 2008-04-09 11 1416
+187 val_187 2008-04-09 11 2492
+77 val_77 2008-04-09 11 2622
+187 val_187 2008-04-09 11 4516
+448 val_448 2008-04-09 11 5636
+484 val_484 2008-04-09 12 102
+286 val_286 2008-04-09 12 1404
+187 val_187 2008-04-09 12 1416
+187 val_187 2008-04-09 12 2492
+77 val_77 2008-04-09 12 2622
+187 val_187 2008-04-09 12 4516
+448 val_448 2008-04-09 12 5636
+PREHOOK: query: -- split sampling
+explain select * from src TABLESAMPLE (0.25 PERCENT)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- split sampling
+explain select * from src TABLESAMPLE (0.25 PERCENT)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ ListSink
+
+
+PREHOOK: query: select * from src TABLESAMPLE (0.25 PERCENT)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src TABLESAMPLE (0.25 PERCENT)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238 val_238
+86 val_86
+PREHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) (TOK_TABLESPLITSAMPLE 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE)))))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: srcpart
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: ds
+ type: string
+ expr: hr
+ type: string
+ expr: BLOCK__OFFSET__INSIDE__FILE
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ ListSink
+
+
+PREHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+238 val_238 2008-04-08 11 0
+86 val_86 2008-04-08 11 12
+238 val_238 2008-04-08 12 0
+86 val_86 2008-04-08 12 12
+238 val_238 2008-04-09 11 0
+86 val_86 2008-04-09 11 12
+238 val_238 2008-04-09 12 0
+86 val_86 2008-04-09 12 12
+PREHOOK: query: -- non deterministic func
+explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- non deterministic func
+explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) "2008-04-09") (> (TOK_FUNCTION rand) 1)))))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: srcpart
+ Filter Operator
+ predicate:
+ expr: (rand() > 1)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: BLOCK__OFFSET__INSIDE__FILE
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ ListSink
+
+
+PREHOOK: query: select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+PREHOOK: query: -- negative, groupby
+explain select key, count(value) from src group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- negative, groupby
+explain select key, count(value) from src group by key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(value)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- negative, distinct
+explain select distinct key, value from src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- negative, distinct
+explain select distinct key, value from src
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ Reduce Operator Tree:
+ Group By Operator
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- negative, CTAS
+explain create table srcx as select distinct key, value from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: query: -- negative, CTAS
+explain create table srcx as select distinct key, value from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+ABSTRACT SYNTAX TREE:
+ (TOK_CREATETABLE (TOK_TABNAME srcx) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ Reduce Operator Tree:
+ Group By Operator
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ name: default.srcx
+
+ Stage: Stage-0
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-3
+ Create Table Operator:
+ Create Table
+ columns: key string, value string
+ if not exists: false
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ # buckets: -1
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+ name: srcx
+ isExternal: false
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: -- negative, analyze
+explain analyze table src compute statistics
+PREHOOK: type: QUERY
+POSTHOOK: query: -- negative, analyze
+explain analyze table src compute statistics
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_ANALYZE (TOK_TAB (TOK_TABNAME src)))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+ Stage-1 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-0
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+
+ Stage: Stage-1
+ Stats-Aggr Operator
+
+
+PREHOOK: query: -- negative, subq
+explain select a.* from (select * from src) a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- negative, subq
+explain select a.* from (select * from src) a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- negative, join
+explain select * from src join src src2 on src.key=src2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- negative, join
+explain select * from src join src src2 on src.key=src2.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src) src2) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ src2
+ TableScan
+ alias: src2
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
index b21755e..c798fe6 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
@@ -290,6 +290,17 @@ public final class ObjectInspectorFactory {
return result;
}
+ public static UnionStructObjectInspector repackUnionStructObjectInspector(
+ UnionStructObjectInspector unionInspector, List fieldsInspector) {
+ if (unionInspector == null) {
+ return getUnionStructObjectInspector(fieldsInspector);
+ }
+ cachedUnionStructObjectInspector.remove(fieldsInspector);
+ unionInspector.init(fieldsInspector);
+ cachedUnionStructObjectInspector.put(fieldsInspector, unionInspector);
+ return unionInspector;
+ }
+
static HashMap, ColumnarStructObjectInspector> cachedColumnarStructObjectInspector = new HashMap, ColumnarStructObjectInspector>();
public static ColumnarStructObjectInspector getColumnarStructObjectInspector(