diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java index 6c80a14..1e70602 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java @@ -17,6 +17,13 @@ */ package org.apache.hadoop.hive.ql.exec; +import java.io.IOException; +import java.io.Serializable; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.Future; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -28,11 +35,6 @@ import org.apache.hadoop.hive.ql.plan.FileMergeDesc; import org.apache.hadoop.mapred.JobConf; -import java.io.IOException; -import java.io.Serializable; -import java.util.HashSet; -import java.util.Set; - /** * Fast file merge operator for ORC and RCfile. This is an abstract class which * does not process any rows. Refer {@link org.apache.hadoop.hive.ql.exec.OrcFileMergeOperator} @@ -63,8 +65,8 @@ protected transient DynamicPartitionCtx dpCtx; @Override - public void initializeOp(Configuration hconf) throws HiveException { - super.initializeOp(hconf); + public Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); this.jc = new JobConf(hconf); incompatFileSet = new HashSet(); autoDelete = false; @@ -92,6 +94,7 @@ public void initializeOp(Configuration hconf) throws HiveException { throw new HiveException("Failed to initialize AbstractFileMergeOperator", e); } + return result; } // sets up temp and task temp path diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java index ca17039..f948861 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java @@ -20,7 +20,9 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; @@ -57,7 +59,7 @@ public AbstractMapJoinOperator(AbstractMapJoinOperator mj @Override @SuppressWarnings("unchecked") - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { if (conf.getGenJoinKeys()) { int tagLen = conf.getTagLength(); joinKeys = new List[tagLen]; @@ -66,7 +68,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { inputObjInspectors,NOTSKIPBIGTABLE, tagLen); } - super.initializeOp(hconf); + Collection> result = super.initializeOp(hconf); numMapRowsRead = 0; @@ -81,7 +83,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { !hasFilter(posBigTable), reporter); storage[posBigTable] = bigPosRC; - initializeChildren(hconf); + return result; } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java index caa4528..dd2be03 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AppMasterEventOperator.java @@ -20,7 +20,9 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Collection; import java.util.Collections; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; @@ -50,11 +52,14 @@ protected transient long MAX_SIZE; @Override - public void initializeOp(Configuration hconf) throws HiveException { + public Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); + MAX_SIZE = HiveConf.getLongVar(hconf, ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_MAX_EVENT_SIZE); serializer = (Serializer) ReflectionUtils.newInstance(conf.getTable().getDeserializerClass(), null); initDataBuffer(false); + return result; } protected void initDataBuffer(boolean skipPruning) throws HiveException { @@ -71,7 +76,7 @@ protected void initDataBuffer(boolean skipPruning) throws HiveException { } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { if (hasReachedMaxSize) { return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java index aa45b7d..a7fbfe7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CollectOperator.java @@ -20,11 +20,13 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.plan.CollectDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -41,16 +43,17 @@ transient int maxSize; @Override - protected void initializeOp(Configuration hconf) throws HiveException { - super.initializeOp(hconf); + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); rowList = new ArrayList(); maxSize = conf.getBufferSize().intValue(); + return result; } boolean firstRow = true; @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { ObjectInspector rowInspector = inputObjInspectors[tag]; if (firstRow) { firstRow = false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index e2f696e..7cea5be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -368,7 +368,7 @@ public int execute(DriverContext driverContext) { return persistPartitionStats(); } } catch (Exception e) { - LOG.info(e); + LOG.error("Failed to run column stats task", e); } return 1; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java index 1d5ebb1..79fa1b3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java @@ -21,9 +21,11 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -185,7 +187,8 @@ public CommonJoinOperator(CommonJoinOperator clone) { @Override @SuppressWarnings("unchecked") - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); this.handleSkewJoin = conf.getHandleSkewJoin(); this.hconf = hconf; @@ -319,6 +322,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { if (isLogInfoEnabled) { LOG.info("JOIN " + outputObjInspector.getTypeName() + " totalsz = " + totalSz); } + return result; } transient boolean newGroupStarted = false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java index 99e9b99..22fb7f1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java @@ -20,9 +20,11 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -85,10 +87,10 @@ public CommonMergeJoinOperator() { @SuppressWarnings("unchecked") @Override - public void initializeOp(Configuration hconf) throws HiveException { - super.initializeOp(hconf); + public Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); firstFetchHappened = false; - initializeChildren(hconf); + int maxAlias = 0; for (byte pos = 0; pos < order.length; pos++) { if (pos > maxAlias) { @@ -132,6 +134,7 @@ public void initializeOp(Configuration hconf) throws HiveException { } sources = ((TezContext) MapredContext.get()).getRecordSources(); + return result; } @Override @@ -155,7 +158,7 @@ public void startGroup() throws HiveException { * push but the rest is pulled until we run out of records. */ @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { posBigTable = (byte) conf.getBigTablePosition(); byte alias = (byte) tag; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java index f6f0be3..48b3139 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java @@ -21,9 +21,11 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -108,7 +110,8 @@ private int[][] newChildOperatorsTag; @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); // A DemuxOperator should have at least one child if (childOperatorsArray.length == 0) { throw new HiveException( @@ -180,7 +183,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { if (isLogInfoEnabled) { LOG.info("newChildOperatorsTag " + Arrays.toString(newChildOperatorsTag)); } - initializeChildren(hconf); + return result; } private int[] toArray(List list) { @@ -253,7 +256,7 @@ protected void initializeChildren(Configuration hconf) throws HiveException { } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { int currentChildIndex = newTagToChildIndex[tag]; // Check if we start to forward rows to a new child. @@ -277,7 +280,7 @@ public void processOp(Object row, int tag) throws HiveException { if (child.getDone()) { childrenDone++; } else { - child.processOp(row, oldTag); + child.process(row, oldTag); } // if all children are done, this operator is also done diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java index 2b15c83..6a68059 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.util.Collection; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -72,13 +74,14 @@ public DummyStoreOperator() { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> ret = super.initializeOp(hconf); /* - * The conversion to standard object inspector was necessitated by HIVE-5973. The issue - * happens when a select operator preceeds this operator as in the case of a subquery. The - * select operator does not allocate a new object to hold the deserialized row. This affects + * The conversion to standard object inspector was necessitated by HIVE-5973. The issue + * happens when a select operator preceeds this operator as in the case of a subquery. The + * select operator does not allocate a new object to hold the deserialized row. This affects * the operation of the SMB join which puts the object in a priority queue. Since all elements - * of the priority queue point to the same object, the join was resulting in incorrect + * of the priority queue point to the same object, the join was resulting in incorrect * results. * * So the fix is to make a copy of the object as done in the processOp phase below. This @@ -87,11 +90,11 @@ protected void initializeOp(Configuration hconf) throws HiveException { */ outputObjInspector = ObjectInspectorUtils.getStandardObjectInspector(inputObjInspectors[0]); result = new InspectableObject(null, outputObjInspector); - initializeChildren(hconf); + return ret; } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { // Store the row. See comments above for why we need a new copy of the row. result.o = ObjectInspectorUtils.copyToStandardObject(row, inputObjInspectors[0], ObjectInspectorCopyOption.WRITABLE); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index dbd6e60..b049efd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -29,7 +29,6 @@ import java.util.Properties; import org.apache.commons.lang3.StringEscapeUtils; -import com.google.common.collect.Iterators; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configurable; @@ -74,6 +73,8 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hive.common.util.AnnotationUtils; +import com.google.common.collect.Iterators; + /** * FetchTask implementation. **/ @@ -93,7 +94,7 @@ private final boolean isPartitioned; private final boolean isNonNativeTable; private StructObjectInspector vcsOI; - private List vcCols; + private final List vcCols; private ExecMapperContext context; private transient Deserializer tableSerDe; @@ -178,7 +179,7 @@ private ExecMapperContext setupExecContext(Operator operator, List paths) if (hasVC || work.getSplitSample() != null) { context = new ExecMapperContext(job); if (operator != null) { - operator.setExecContext(context); + operator.passExecContext(context); } } setFetchOperatorContext(job, paths); @@ -203,7 +204,7 @@ static InputFormat getInputFormatFromCache(Class inputFor JobConf conf) throws IOException { if (Configurable.class.isAssignableFrom(inputFormatClass) || JobConfigurable.class.isAssignableFrom(inputFormatClass)) { - return (InputFormat) ReflectionUtils + return ReflectionUtils .newInstance(inputFormatClass, conf); } InputFormat format = inputFormats.get(inputFormatClass.getName()); @@ -406,7 +407,7 @@ public boolean doNext(WritableComparable key, Writable value) throws IOException public boolean pushRow() throws IOException, HiveException { if (work.getRowsComputedUsingStats() != null) { for (List row : work.getRowsComputedUsingStats()) { - operator.processOp(row, 0); + operator.process(row, 0); } flushRow(); return true; @@ -421,7 +422,7 @@ public boolean pushRow() throws IOException, HiveException { } protected void pushRow(InspectableObject row) throws HiveException { - operator.processOp(row.o, 0); + operator.process(row.o, 0); } protected void flushRow() throws HiveException { @@ -656,7 +657,7 @@ private boolean needConversion(TableDesc tableDesc, List partDesc // what's different is that this is evaluated by unit of row using RecordReader.getPos() // and that is evaluated by unit of split using InputSplit.getLength(). private long shrinkedLength = -1; - private InputFormat inputFormat; + private final InputFormat inputFormat; public FetchInputFormatSplit(InputSplit split, InputFormat inputFormat) { super(split, inputFormat.getClass().getName()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 810aaa6..8b2749c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -18,16 +18,20 @@ package org.apache.hadoop.hive.ql.exec; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_TEMPORARY_TABLE_STORAGE; + import java.io.FileNotFoundException; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -76,8 +80,6 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.ReflectionUtils; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_TEMPORARY_TABLE_STORAGE; - /** * File Sink operator implementation. **/ @@ -319,7 +321,8 @@ private void initializeSpecPath() { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); try { this.hconf = hconf; filesCreated = false; @@ -425,14 +428,13 @@ protected void initializeOp(Configuration hconf) throws HiveException { } statsMap.put(Counter.RECORDS_OUT + "_" + suffix, row_count); - - initializeChildren(hconf); } catch (HiveException e) { throw e; } catch (Exception e) { e.printStackTrace(); throw new HiveException(e); } + return result; } /** @@ -630,7 +632,7 @@ protected boolean updateProgress() { protected Writable recordValue; @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { /* Create list bucketing sub-directory only if stored-as-directories is on. */ String lbDirName = null; lbDirName = (lbCtx == null) ? null : generateListBucketingDirName(row); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java index 1840865..65301c0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FilterOperator.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.util.Collection; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; @@ -28,7 +30,6 @@ import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.io.LongWritable; /** * Filter operator implementation. @@ -49,7 +50,8 @@ public FilterOperator() { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); try { heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT); @@ -63,11 +65,11 @@ protected void initializeOp(Configuration hconf) throws HiveException { } catch (Throwable e) { throw new HiveException(e); } - initializeChildren(hconf); + return result; } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { ObjectInspector rowInspector = inputObjInspectors[tag]; if (conditionInspector == null) { conditionInspector = (PrimitiveObjectInspector) conditionEvaluator diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java index f007943..6cd8c80 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java @@ -19,7 +19,10 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.util.Collection; +import java.util.concurrent.Future; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; @@ -32,7 +35,7 @@ private static final long serialVersionUID = 1L; @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { forward(row, inputObjInspectors[tag]); } @@ -57,4 +60,9 @@ public String getName() { static public String getOperatorName() { return "FOR"; } + + @Override + protected Collection> initializeOp(Configuration hconf) throws HiveException { + return super.initializeOp(hconf); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index dfee3a5..9867739 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -24,12 +24,14 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.Future; import javolution.util.FastBitSet; @@ -178,7 +180,8 @@ public static FastBitSet groupingSet2BitSet(int value) { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); numRowsInput = 0; numRowsHashTbl = 0; @@ -390,7 +393,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { memoryMXBean = ManagementFactory.getMemoryMXBean(); maxMemory = memoryMXBean.getHeapMemoryUsage().getMax(); memoryThreshold = this.getConf().getMemoryThreshold(); - initializeChildren(hconf); + return result; } /** @@ -700,7 +703,7 @@ private void processKey(Object row, } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { firstRow = false; ObjectInspector rowInspector = inputObjInspectors[tag]; // Total number of input rows is needed for hash aggregation only diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java index 91b2369..2829a9d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.util.Collection; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -31,21 +33,22 @@ private static final long serialVersionUID = 1L; @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); TableDesc tbl = this.getConf().getTbl(); try { Deserializer serde = tbl.getDeserializerClass().newInstance(); SerDeUtils.initializeSerDe(serde, hconf, tbl.getProperties(), null); this.outputObjInspector = serde.getObjectInspector(); - initializeChildren(hconf); } catch (Exception e) { LOG.error("Generating output obj inspector from dummy object error", e); e.printStackTrace(); } + return result; } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { throw new HiveException(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java index 7602740..c3e3078 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java @@ -19,11 +19,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; /** * HashTableLoader is an interface used by MapJoinOperator used to load the hashtables @@ -31,8 +29,9 @@ */ public interface HashTableLoader { - void init(ExecMapperContext context, Configuration hconf, MapJoinOperator joinOp); + void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf, + MapJoinOperator joinOp); - void load(MapJoinTableContainer[] mapJoinTables, - MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage) throws HiveException; + void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes, + long memUsage) throws HiveException; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java index 80ce96a..96283cd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java @@ -22,7 +22,9 @@ import java.io.ObjectOutputStream; import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -33,10 +35,10 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionHandler; import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinEagerRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKeyObject; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinPersistableTableContainer; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinEagerRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; @@ -114,7 +116,8 @@ public HashTableSinkOperator(MapJoinOperator mjop) { @Override @SuppressWarnings("unchecked") - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); boolean isSilent = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVESESSIONSILENT); console = new LogHelper(LOG, isSilent); memoryExhaustionHandler = new MapJoinMemoryExhaustionHandler(console, conf.getHashtableMemoryUsage()); @@ -189,6 +192,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { } catch (SerDeException e) { throw new HiveException(e); } + return result; } public MapJoinTableContainer[] getMapJoinTables() { @@ -219,7 +223,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { * This operator only process small tables Read the key/value pairs Load them into hashtable */ @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { byte alias = (byte)tag; // compute keys and values as StandardObjects. Use non-optimized key (MR). Object[] currentKey = new Object[joinKeys[alias].size()]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java index f49cb2a..a5f4e5b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java @@ -20,7 +20,9 @@ import java.io.IOException; import java.io.Serializable; +import java.util.Collection; import java.util.List; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.hadoop.conf.Configuration; @@ -38,8 +40,7 @@ /** * Join operator implementation. */ -public class JoinOperator extends CommonJoinOperator implements - Serializable { +public class JoinOperator extends CommonJoinOperator implements Serializable { private static final long serialVersionUID = 1L; private transient SkewJoinHandler skewJoinKeyContext = null; @@ -55,19 +56,19 @@ private final transient LongWritable skewjoin_followup_jobs = new LongWritable(0); @Override - protected void initializeOp(Configuration hconf) throws HiveException { - super.initializeOp(hconf); - initializeChildren(hconf); + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); if (handleSkewJoin) { skewJoinKeyContext = new SkewJoinHandler(this); skewJoinKeyContext.initiliaze(hconf); skewJoinKeyContext.setSkewJoinJobCounter(skewjoin_followup_jobs); } statsMap.put(SkewkeyTableCounter.SKEWJOINFOLLOWUPJOBS.toString(), skewjoin_followup_jobs); + return result; } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { try { reportProgress(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewForwardOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewForwardOperator.java index 4e430f2..e1479c0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewForwardOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewForwardOperator.java @@ -18,6 +18,10 @@ package org.apache.hadoop.hive.ql.exec; +import java.util.Collection; +import java.util.concurrent.Future; + +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; @@ -33,7 +37,7 @@ private static final long serialVersionUID = 1L; @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { forward(row, inputObjInspectors[tag]); } @@ -50,4 +54,9 @@ static public String getOperatorName() { public OperatorType getType() { return OperatorType.LATERALVIEWFORWARD; } + + @Override + protected Collection> initializeOp(Configuration hconf) throws HiveException { + return super.initializeOp(hconf); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java index 8735d8c..15b8387 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java @@ -19,7 +19,9 @@ package org.apache.hadoop.hive.ql.exec; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -82,7 +84,8 @@ public static final byte UDTF_TAG = 1; @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); ArrayList ois = new ArrayList(); ArrayList fieldNames = conf.getOutputInternalColNames(); @@ -104,9 +107,8 @@ protected void initializeOp(Configuration hconf) throws HiveException { outputObjInspector = ObjectInspectorFactory .getStandardStructObjectInspector(fieldNames, ois); + return result; - // Initialize the rest of the operator DAG - super.initializeOp(hconf); } // acc is short for accumulator. It's used to build the row before forwarding @@ -121,7 +123,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { * by all the corresponding rows from the UDTF operator. And so on. */ @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; if (tag == SELECT_TAG) { selectObjs.clear(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java index 276902a..86519a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.util.Collection; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -37,16 +39,17 @@ protected transient boolean isMap; @Override - protected void initializeOp(Configuration hconf) throws HiveException { - super.initializeOp(hconf); + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); limit = conf.getLimit(); leastRow = conf.getLeastRows(); currCount = 0; isMap = hconf.getBoolean("mapred.task.is.map", true); + return result; } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { if (currCount < limit) { forward(row, inputObjInspectors[tag]); currCount++; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java index 25797c6..87917dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java @@ -18,11 +18,12 @@ package org.apache.hadoop.hive.ql.exec; +import java.util.Collection; import java.util.List; import java.util.Properties; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ListSinkDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; @@ -43,13 +44,14 @@ private transient int numRows; @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); try { fetcher = initializeFetcher(hconf); } catch (Exception e) { throw new HiveException(e); } - super.initializeOp(hconf); + return result; } private FetchFormatter initializeFetcher(Configuration conf) throws Exception { @@ -81,8 +83,9 @@ public int getNumRows() { return numRows; } + @Override @SuppressWarnings("unchecked") - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { try { res.add(fetcher.convert(row, inputObjInspectors[0])); numRows++; @@ -91,6 +94,7 @@ public void processOp(Object row, int tag) throws HiveException { } } + @Override public OperatorType getType() { return OperatorType.FORWARD; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 30731b3..39b026b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -20,17 +20,21 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.concurrent.Callable; +import java.util.concurrent.Future; -import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.HashTableLoaderFactory; import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionHandler; +import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer; @@ -94,9 +98,13 @@ public void startGroup() throws HiveException { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { unwrapContainer = new UnwrapRowContainer[conf.getTagLength()]; - super.initializeOp(hconf); + + Collection> result = super.initializeOp(hconf); + if (result == null) { + result = new HashSet>(); + } int tagLen = conf.getTagLength(); @@ -116,6 +124,9 @@ protected void initializeOp(Configuration hconf) throws HiveException { generateMapMetaData(); + final ExecMapperContext mapContext = getExecContext(); + final MapredContext mrContext = MapredContext.get(); + if (!conf.isBucketMapJoin()) { /* * The issue with caching in case of bucket map join is that different tasks @@ -126,27 +137,42 @@ protected void initializeOp(Configuration hconf) throws HiveException { * also ability to schedule tasks to re-use containers that have cached the specific bucket. */ if (isLogInfoEnabled) { - LOG.info("This is not bucket map join, so cache"); + LOG.info("This is not bucket map join, so cache"); } - Pair pair = - (Pair) - cache.retrieve(cacheKey, new Callable() { - public Object call() throws HiveException { - return loadHashTable(); - } - }); + Future> future = + cache.retrieveAsync( + cacheKey, + new Callable>() { + @Override + public Pair call() + throws HiveException { + return loadHashTable(mapContext, mrContext); + } + }); + result.add(future); + } else if (mapContext.getLocalWork() == null + || mapContext.getLocalWork().getInputFileChangeSensitive() == false) { + loadHashTable(mapContext, mrContext); + hashTblInitedOnce = true; + } + return result; + } + @SuppressWarnings("unchecked") + @Override + protected final void completeInitializationOp(Object[] os) throws HiveException { + if (os.length != 0) { + Pair pair = + (Pair) os[0]; mapJoinTables = pair.getLeft(); mapJoinTableSerdes = pair.getRight(); hashTblInitedOnce = true; - } else { - loadHashTable(); } if (this.getExecContext() != null) { // reset exec context so that initialization of the map operator happens - // poperly + // properly this.getExecContext().setLastInputPath(null); this.getExecContext().setCurrentInputPath(null); } @@ -182,45 +208,44 @@ public void generateMapMetaData() throws HiveException { try { TableDesc keyTableDesc = conf.getKeyTblDesc(); SerDe keySerializer = (SerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), - null); + null); SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null); MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false); for (int pos = 0; pos < order.length; pos++) { - if (pos == posBigTable) { - continue; - } - TableDesc valueTableDesc; - if (conf.getNoOuterJoin()) { - valueTableDesc = conf.getValueTblDescs().get(pos); - } else { - valueTableDesc = conf.getValueFilteredTblDescs().get(pos); - } - SerDe valueSerDe = (SerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), - null); - SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); - MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)); - mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, valueContext); + if (pos == posBigTable) { + continue; + } + TableDesc valueTableDesc; + if (conf.getNoOuterJoin()) { + valueTableDesc = conf.getValueTblDescs().get(pos); + } else { + valueTableDesc = conf.getValueFilteredTblDescs().get(pos); + } + SerDe valueSerDe = + (SerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null); + SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); + MapJoinObjectSerDeContext valueContext = + new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)); + mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, valueContext); } } catch (SerDeException e) { throw new HiveException(e); } } - private Pair - loadHashTable() throws HiveException { + private Pair loadHashTable( + ExecMapperContext mapContext, MapredContext mrContext) throws HiveException { if (this.hashTblInitedOnce - && ((this.getExecContext() == null) - || (this.getExecContext().getLocalWork() == null) - || (this.getExecContext().getLocalWork().getInputFileChangeSensitive() - == false))) { + && ((mapContext == null) || (mapContext.getLocalWork() == null) || (mapContext + .getLocalWork().getInputFileChangeSensitive() == false))) { // no need to reload - return new ImmutablePair (mapJoinTables, mapJoinTableSerdes); + return new ImmutablePair( + mapJoinTables, mapJoinTableSerdes); } perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.LOAD_HASHTABLE); - loader.init(getExecContext(), hconf, this); + loader.init(mapContext, mrContext, hconf, this); long memUsage = (long)(MapJoinMemoryExhaustionHandler.getMaxHeapSize() * conf.getHashTableMemoryUsage()); loader.load(mapJoinTables, mapJoinTableSerdes, memUsage); @@ -239,7 +264,7 @@ public void generateMapMetaData() throws HiveException { // Load the hash table @Override public void cleanUpInputFileChangedOp() throws HiveException { - loadHashTable(); + loadHashTable(getExecContext(), MapredContext.get()); } protected void setMapJoinKey( @@ -260,7 +285,7 @@ protected MapJoinKey getRefKey(byte alias) { } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { try { alias = (byte) tag; if (hashMapRowGetters == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 6d66c74..dc7d821 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -21,6 +21,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -29,8 +30,8 @@ import java.util.Map.Entry; import java.util.Properties; import java.util.Set; +import java.util.concurrent.Future; -import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; @@ -62,6 +63,8 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; +import com.google.common.annotations.VisibleForTesting; + /** * Map operator. This triggers overall map side processing. This is a little * different from regular operators in that it starts off by processing a @@ -154,7 +157,7 @@ public boolean forward(Object row) throws HiveException { if (op.getDone()) { return false; } - op.processOp(row, 0); + op.process(row, 0); return true; } } @@ -172,8 +175,8 @@ public boolean forward(Object row) throws HiveException { void initializeAsRoot(JobConf hconf, MapWork mapWork) throws Exception { setConf(mapWork); setChildren(hconf); - setExecContext(new ExecMapperContext(hconf)); - initialize(hconf, null); + passExecContext(new ExecMapperContext(hconf)); + initializeMapOperator(hconf); } private MapOpCtx initObjectInspector(Configuration hconf, MapOpCtx opCtx, @@ -413,7 +416,11 @@ private String getNominalPath(Path fpath) { } @Override - public void initializeOp(Configuration hconf) throws HiveException { + public Collection> initializeOp(Configuration hconf) throws HiveException { + return super.initializeOp(hconf); + } + + public void initializeMapOperator(Configuration hconf) throws HiveException { // set that parent initialization is done and call initialize on children state = State.INIT; statsMap.put(Counter.DESERIALIZE_ERRORS.toString(), deserialize_error_count); @@ -604,7 +611,7 @@ else if(vc.equals(VirtualColumn.ROWID)) { } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { throw new HiveException("Hive 2 Internal error: should not be called!"); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java index ce705c2..4eca2d8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java @@ -52,6 +52,7 @@ public static MapredContext init(boolean isMap, JobConf jobConf) { HiveConf.getVar(jobConf, ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ? new TezContext(isMap, jobConf) : new MapredContext(isMap, jobConf); contexts.set(context); + logger.info("MapredContext initialized."); return context; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java index 5969050..eb4dff3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java @@ -21,7 +21,9 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -169,7 +171,9 @@ public Object process(Object row) throws HiveException { private transient long[] nextCntrs; @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); + // A MuxOperator should only have a single child if (childOperatorsArray.length != 1) { throw new HiveException( @@ -204,7 +208,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { cntrs[i] = 0; nextCntrs[i] = 1; } - initializeChildren(hconf); + return result; } /** @@ -230,7 +234,7 @@ protected void initializeChildren(Configuration hconf) throws HiveException { } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { if (isLogInfoEnabled) { cntrs[tag]++; if (cntrs[tag] == nextCntrs[tag]) { @@ -247,11 +251,11 @@ public void processOp(Object row, int tag) throws HiveException { } else { if (forward[tag]) { // No need to evaluate, just forward it. - child.processOp(row, tag); + child.process(row, tag); } else { // Call the corresponding handler to evaluate this row and // forward the result - child.processOp(handlers[tag].process(row), handlers[tag].getTag()); + child.process(handlers[tag].process(row), handlers[tag].getTag()); } } } @@ -269,7 +273,7 @@ public void forward(Object row, ObjectInspector rowInspector) // we cannot pass new tag to this method which is used to get // the old tag from the mapping of newTagToOldTag, we bypass // this method in MuxOperator and directly call process on children - // in processOp() method.. + // in process() method.. } @Override @@ -308,7 +312,7 @@ public void processGroup(int tag) throws HiveException { protected void closeOp(boolean abort) throws HiveException { if (isLogInfoEnabled) { for (int i = 0; i < numParents; i++) { - LOG.info(id + ", tag=" + i + ", forwarded " + cntrs[i] + " rows"); + LOG.info(id + ", tag=" + i + ", forwarded " + cntrs[i] + " rows"); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java index b7d6261..f0df2d3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec; import java.util.concurrent.Callable; +import java.util.concurrent.Future; + import org.apache.hadoop.hive.ql.metadata.HiveException; /** @@ -32,9 +34,23 @@ /** * Retrieve object from cache. + * + * @param + * @param key + * @param fn + * function to generate the object if it's not there + * @return the last cached object with the key, null if none. + */ + public T retrieve(String key, Callable fn) throws HiveException; + + /** + * Retrieve object from cache asynchronously. + * + * @param * @param key - * @param fn function to generate the object if it's not there + * @param fn + * function to generate the object if it's not there * @return the last cached object with the key, null if none. */ - public Object retrieve(String key, Callable fn) throws HiveException; + public Future retrieveAsync(String key, Callable fn) throws HiveException; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index c491a47..a51c352 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; @@ -68,6 +69,7 @@ protected List> parentOperators; protected String operatorId; private transient ExecMapperContext execContext; + private transient boolean rootInitializeCalled = false; private static AtomicInteger seqId; @@ -101,13 +103,13 @@ // dummy operator (for not increasing seqId) private Operator(String name) { id = name; + initOperatorId(); + childOperators = new ArrayList>(); + parentOperators = new ArrayList>(); } public Operator() { - id = String.valueOf(seqId.getAndIncrement()); - childOperators = new ArrayList>(); - parentOperators = new ArrayList>(); - initOperatorId(); + this(String.valueOf(seqId.getAndIncrement())); } public static void resetId() { @@ -252,11 +254,6 @@ public String getIdentifier() { public void setReporter(Reporter rep) { reporter = rep; - // the collector is same across all operators - if (childOperators == null) { - return; - } - for (Operator op : childOperators) { op.setReporter(rep); } @@ -266,11 +263,6 @@ public void setReporter(Reporter rep) { public void setOutputCollector(OutputCollector out) { this.out = out; - // the collector is same across all operators - if (childOperators == null) { - return; - } - for (Operator op : childOperators) { op.setOutputCollector(out); } @@ -282,10 +274,6 @@ public void setOutputCollector(OutputCollector out) { public void setAlias(String alias) { this.alias = alias; - if (childOperators == null) { - return; - } - for (Operator op : childOperators) { op.setAlias(alias); } @@ -306,9 +294,6 @@ public void setAlias(String alias) { * otherwise */ protected boolean areAllParentsInitialized() { - if (parentOperators == null) { - return true; - } for (Operator parent : parentOperators) { if (parent == null) { //return true; @@ -332,7 +317,7 @@ protected boolean areAllParentsInitialized() { * @throws HiveException */ @SuppressWarnings("unchecked") - public void initialize(Configuration hconf, ObjectInspector[] inputOIs) + public final void initialize(Configuration hconf, ObjectInspector[] inputOIs) throws HiveException { if (state == State.INIT) { return; @@ -344,7 +329,7 @@ public void initialize(Configuration hconf, ObjectInspector[] inputOIs) } if (isLogInfoEnabled) { - LOG.info("Initializing Self " + this); + LOG.info("Initializing operator " + this); } if (inputOIs != null) { @@ -352,50 +337,69 @@ public void initialize(Configuration hconf, ObjectInspector[] inputOIs) } // initialize structure to maintain child op info. operator tree changes - // while - // initializing so this need to be done here instead of initialize() method - if (childOperators != null && !childOperators.isEmpty()) { - childOperatorsArray = new Operator[childOperators.size()]; - for (int i = 0; i < childOperatorsArray.length; i++) { - childOperatorsArray[i] = childOperators.get(i); - } - childOperatorsTag = new int[childOperatorsArray.length]; - for (int i = 0; i < childOperatorsArray.length; i++) { - List> parentOperators = childOperatorsArray[i] - .getParentOperators(); - if (parentOperators == null) { - throw new HiveException("Hive internal error: parent is null in " - + childOperatorsArray[i].getClass() + "!"); - } - childOperatorsTag[i] = parentOperators.indexOf(this); - if (childOperatorsTag[i] == -1) { - throw new HiveException( - "Hive internal error: cannot find parent in the child operator!"); - } + // while initializing so this need to be done here instead of constructor + childOperatorsArray = new Operator[childOperators.size()]; + for (int i = 0; i < childOperatorsArray.length; i++) { + childOperatorsArray[i] = childOperators.get(i); + } + childOperatorsTag = new int[childOperatorsArray.length]; + for (int i = 0; i < childOperatorsArray.length; i++) { + List> parentOperators = + childOperatorsArray[i].getParentOperators(); + childOperatorsTag[i] = parentOperators.indexOf(this); + if (childOperatorsTag[i] == -1) { + throw new HiveException("Hive internal error: cannot find parent in the child operator!"); } } if (inputObjInspectors.length == 0) { throw new HiveException("Internal Error during operator initialization."); } + // derived classes can set this to different object if needed outputObjInspector = inputObjInspectors[0]; - //pass the exec context to child operators - passExecContext(this.execContext); + Collection> asyncInitOperations = initializeOp(hconf); - initializeOp(hconf); - - // sanity check - if (childOperatorsArray == null - && !(childOperators == null || childOperators.isEmpty())) { - throw new HiveException( - "Internal Hive error during operator initialization."); + // sanity checks + if (!rootInitializeCalled + || asyncInitOperations == null + || childOperatorsArray.length != childOperators.size()) { + throw new AssertionError("Internal error during operator initialization"); } if (isLogInfoEnabled) { LOG.info("Initialization Done " + id + " " + getName()); } + + initializeChildren(hconf); + + // let's wait on the async ops before continuing + completeInitialization(asyncInitOperations); + } + + private void completeInitialization(Collection> fs) throws HiveException { + Object[] os = new Object[fs.size()]; + int i = 0; + for (Future f : fs) { + try { + os[i++] = f.get(); + } catch (Exception e) { + throw new HiveException(e); + } + } + completeInitializationOp(os); + } + + /** + * This metod can be used to retrieve the results from async operations + * started at init time - before the operator pipeline is started. + * + * @param os + * @throws HiveException + */ + protected void completeInitializationOp(Object[] os) throws HiveException { + // no-op default } public void initializeLocalWork(Configuration hconf) throws HiveException { @@ -410,8 +414,9 @@ public void initializeLocalWork(Configuration hconf) throws HiveException { /** * Operator specific initialization. */ - protected void initializeOp(Configuration hconf) throws HiveException { - initializeChildren(hconf); + protected Collection> initializeOp(Configuration hconf) throws HiveException { + rootInitializeCalled = true; + return new ArrayList>(); } /** @@ -430,8 +435,7 @@ protected void initializeChildren(Configuration hconf) throws HiveException { LOG.info("Initializing children of " + id + " " + getName()); } for (int i = 0; i < childOperatorsArray.length; i++) { - childOperatorsArray[i].initialize(hconf, outputObjInspector, - childOperatorsTag[i]); + childOperatorsArray[i].initialize(hconf, outputObjInspector, childOperatorsTag[i]); if (reporter != null) { childOperatorsArray[i].setReporter(reporter); } @@ -443,10 +447,8 @@ protected void initializeChildren(Configuration hconf) throws HiveException { */ public void passExecContext(ExecMapperContext execContext) { this.setExecContext(execContext); - if(childOperators != null) { - for (int i = 0; i < childOperators.size(); i++) { + for (int i = 0; i < childOperators.size(); i++) { childOperators.get(i).passExecContext(execContext); - } } } @@ -501,7 +503,7 @@ public ObjectInspector getOutputObjInspector() { * Rows with the same tag should have exactly the same rowInspector * all the time. */ - public abstract void processOp(Object row, int tag) throws HiveException; + public abstract void process(Object row, int tag) throws HiveException; protected final void defaultStartGroup() throws HiveException { if (isLogDebugEnabled) { @@ -598,7 +600,7 @@ public void close(boolean abort) throws HiveException { // check if all parents are finished if (!allInitializedParentsAreClosed()) { if (isLogDebugEnabled) { - LOG.debug("Not all parent operators are closed. Not closing."); + LOG.debug("Not all parent operators are closed. Not closing."); } return; } @@ -822,7 +824,7 @@ protected long getNextCntr(long cntr) { protected void forward(Object row, ObjectInspector rowInspector) throws HiveException { - if ((childOperatorsArray == null) || (getDone())) { + if (getDone()) { return; } @@ -832,12 +834,12 @@ protected void forward(Object row, ObjectInspector rowInspector) if (o.getDone()) { childrenDone++; } else { - o.processOp(row, childOperatorsTag[i]); + o.process(row, childOperatorsTag[i]); } } // if all children are done, this operator is also done - if (childrenDone == childOperatorsArray.length) { + if (childrenDone != 0 && childrenDone == childOperatorsArray.length) { setDone(true); } } @@ -878,7 +880,7 @@ public void preorderMap(OperatorFunc opFunc) { public void logStats() { if (isLogInfoEnabled) { for (String e : statsMap.keySet()) { - LOG.info(e.toString() + ":" + statsMap.get(e).toString()); + LOG.info(e.toString() + ":" + statsMap.get(e).toString()); } } } @@ -969,7 +971,7 @@ public String dump(int level, HashSet seenOpts) { * Initialize an array of ExprNodeEvaluator and return the result * ObjectInspectors. */ - protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator[] evals, + protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator[] evals, ObjectInspector rowInspector) throws HiveException { ObjectInspector[] result = new ObjectInspector[evals.length]; for (int i = 0; i < evals.length; i++) { @@ -982,7 +984,7 @@ public String dump(int level, HashSet seenOpts) { * Initialize an array of ExprNodeEvaluator from start, for specified length * and return the result ObjectInspectors. */ - protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator[] evals, + protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator[] evals, int start, int length, ObjectInspector rowInspector) throws HiveException { ObjectInspector[] result = new ObjectInspector[length]; @@ -997,7 +999,7 @@ public String dump(int level, HashSet seenOpts) { * StructObjectInspector with integer field names. */ protected static StructObjectInspector initEvaluatorsAndReturnStruct( - ExprNodeEvaluator[] evals, List outputColName, + ExprNodeEvaluator[] evals, List outputColName, ObjectInspector rowInspector) throws HiveException { ObjectInspector[] fieldObjectInspectors = initEvaluators(evals, rowInspector); @@ -1059,12 +1061,6 @@ public ExecMapperContext getExecContext() { public void setExecContext(ExecMapperContext execContext) { this.execContext = execContext; - if(this.childOperators != null) { - for (int i = 0; i op = this.childOperators.get(i); - op.setExecContext(execContext); - } - } } // The input file has changed - every operator can invoke specific action @@ -1128,6 +1124,7 @@ public boolean supportSkewJoinOptimization() { * @return Cloned operator * @throws CloneNotSupportedException */ + @SuppressWarnings("unchecked") public Operator cloneOp() throws CloneNotSupportedException { T descClone = (T) conf.clone(); Operator ret = @@ -1148,11 +1145,6 @@ public boolean supportSkewJoinOptimization() { throws CloneNotSupportedException { Operator newOp = this.cloneOp(); newOp.setParentOperators(this.parentOperators); - // Fix parent in all children - if (this.getChildOperators() == null) { - newOp.setChildOperators(null); - return newOp; - } List> newChildren = new ArrayList>(); @@ -1301,12 +1293,13 @@ public Statistics getStatistics() { if (conf != null) { return conf.getStatistics(); } + return null; } public OpTraits getOpTraits() { if (conf != null) { - return conf.getOpTraits(); + return conf.getTraits(); } return null; @@ -1314,36 +1307,48 @@ public OpTraits getOpTraits() { public void setOpTraits(OpTraits metaInfo) { if (isLogDebugEnabled) { - LOG.debug("Setting traits ("+metaInfo+") on "+this); + LOG.debug("Setting traits (" + metaInfo + ") on " + this); } if (conf != null) { - conf.setOpTraits(metaInfo); + conf.setTraits(metaInfo); } else { - LOG.warn("Cannot set traits when there's no descriptor: "+this); + LOG.warn("Cannot set traits when there's no descriptor: " + this); } } public void setStatistics(Statistics stats) { if (isLogDebugEnabled) { - LOG.debug("Setting stats ("+stats+") on "+this); + LOG.debug("Setting stats (" + stats + ") on " + this); } if (conf != null) { conf.setStatistics(stats); } else { - LOG.warn("Cannot set stats when there's no descriptor: "+this); + LOG.warn("Cannot set stats when there's no descriptor: " + this); } } + @SuppressWarnings("rawtypes") public static Operator createDummy() { return new DummyOperator(); } + @SuppressWarnings({ "serial", "unchecked", "rawtypes" }) private static class DummyOperator extends Operator { public DummyOperator() { super("dummy"); } + @Override - public void processOp(Object row, int tag) { } + public void process(Object row, int tag) { + } + @Override - public OperatorType getType() { return null; } + public OperatorType getType() { + return null; + } + + @Override + protected Collection> initializeOp(Configuration conf) { + return childOperators; + } } public Map getTagToOperatorTree() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index b82fcb2..91e8a02 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -18,6 +18,12 @@ package org.apache.hadoop.hive.ql.exec; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.vector.VectorAppMasterEventOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator; @@ -62,16 +68,13 @@ import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - /** * OperatorFactory. * */ @SuppressWarnings({ "rawtypes", "unchecked" }) public final class OperatorFactory { + protected static transient final Log LOG = LogFactory.getLog(OperatorFactory.class); private static final List opvec; private static final List vectorOpvec; @@ -227,9 +230,6 @@ public static void makeChild( // Add this parent to the children for (Operator op : oplist) { List> parents = op.getParentOperators(); - if (parents == null) { - parents = new ArrayList>(); - } parents.add(ret); op.setParentOperators(parents); } @@ -259,9 +259,6 @@ public static void makeChild( // Add the new operator as child of each of the passed in operators for (Operator op : oplist) { List children = op.getChildOperators(); - if (children == null) { - children = new ArrayList(); - } children.add(ret); op.setChildOperators(children); } @@ -286,17 +283,13 @@ public static void makeChild( Operator ret = get((Class) conf.getClass()); ret.setConf(conf); if (oplist.size() == 0) { - return (ret); + return ret; } // Add the new operator as child of each of the passed in operators for (Operator op : oplist) { List children = op.getChildOperators(); - if (children == null) { - children = new ArrayList(); - } children.add(ret); - op.setChildOperators(children); } // add parents for the newly created operator @@ -308,7 +301,7 @@ public static void makeChild( ret.setParentOperators(parent); - return (ret); + return ret; } /** @@ -318,7 +311,7 @@ public static void makeChild( RowSchema rwsch, Operator... oplist) { Operator ret = getAndMakeChild(conf, oplist); ret.setSchema(rwsch); - return (ret); + return ret; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java index 2ff884b..f00fc77 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java @@ -122,7 +122,7 @@ public static void setChildrenCollector(List> c if(op.getName().equals(ReduceSinkOperator.getOperatorName())) { ReduceSinkOperator rs = ((ReduceSinkOperator)op); if (outMap.containsKey(rs.getConf().getOutputName())) { - LOG.info("Setting output collector: " + rs + " --> " + LOG.info("Setting output collector: " + rs + " --> " + rs.getConf().getOutputName()); rs.setOutputCollector(outMap.get(rs.getConf().getOutputName())); } @@ -234,9 +234,7 @@ public static boolean sameRowSchema(Operator operator1, Operator operator2 resultMap.put(clazz, op); } } - if (op.getChildOperators() != null) { - allChildren.addAll(op.getChildOperators()); - } + allChildren.addAll(op.getChildOperators()); } ops = allChildren; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java index 0e2552b..866f7c0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java @@ -56,7 +56,7 @@ private FSDataInputStream fdis; @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { Object[] keyValue = (Object[]) row; processKeyValuePairs(keyValue[0], keyValue[1]); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java index e95505c..7d465d2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java @@ -19,12 +19,13 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Stack; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; @@ -66,7 +67,8 @@ * 4. Create input partition to store rows coming from previous operator */ @Override - protected void initializeOp(Configuration jobConf) throws HiveException { + protected Collection> initializeOp(Configuration jobConf) throws HiveException { + Collection> result = super.initializeOp(jobConf); hiveConf = jobConf; isMapOperator = conf.isMapSide(); @@ -84,8 +86,7 @@ protected void initializeOp(Configuration jobConf) throws HiveException { ptfInvocation = setupChain(); ptfInvocation.initializeStreaming(jobConf, isMapOperator); firstMapRow = true; - - super.initializeOp(jobConf); + return result; } @Override @@ -96,7 +97,7 @@ protected void closeOp(boolean abort) throws HiveException { } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { if (!isMapOperator ) { /* * checkif current row belongs to the current accumulated Partition: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/RCFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/RCFileMergeOperator.java index 37a1de9..8657688 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/RCFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/RCFileMergeOperator.java @@ -43,7 +43,7 @@ int columnNumber = 0; @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { Object[] keyValue = (Object[]) row; processKeyValuePairs(keyValue[0], keyValue[1]); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java index bc91529..ee86c2c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java @@ -18,12 +18,16 @@ package org.apache.hadoop.hive.ql.exec; +import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM; + import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Random; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; @@ -48,14 +52,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.io.BinaryComparable; import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.util.hash.MurmurHash; -import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM; - /** * Reduce Sink Operator sends output to the reduce stage. **/ @@ -153,7 +155,8 @@ private final transient LongWritable recordCounter = new LongWritable(); @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); try { numRows = 0; @@ -237,12 +240,12 @@ protected void initializeOp(Configuration hconf) throws HiveException { useUniformHash = conf.getReducerTraits().contains(UNIFORM); firstRow = true; - initializeChildren(hconf); } catch (Exception e) { String msg = "Error initializing ReduceSinkOperator: " + e.getMessage(); LOG.error(msg, e); throw new RuntimeException(e); } + return result; } @@ -291,7 +294,7 @@ protected static StructObjectInspector initEvaluatorsAndReturnStruct( @Override @SuppressWarnings("unchecked") - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { try { ObjectInspector rowInspector = inputObjInspectors[tag]; if (firstRow) { @@ -514,6 +517,7 @@ protected HiveKey toHiveKey(Object obj, int tag, Integer distLength) throws SerD return keyWritable; } + @Override public void collect(byte[] key, byte[] value, int hash) throws IOException { HiveKey keyWritable = new HiveKey(key, hash); BytesWritable valueWritable = new BytesWritable(value); @@ -608,6 +612,7 @@ public void setInputAliases(String[] inputAliases) { return inputAliases; } + @Override public void setOutputCollector(OutputCollector _out) { this.out = _out; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java index e682e45..ab17821 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java @@ -21,10 +21,12 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -91,7 +93,7 @@ public SMBMapJoinOperator(AbstractMapJoinOperator mapJoin } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { // If there is a sort-merge join followed by a regular join, the SMBJoinOperator may not // get initialized at all. Consider the following query: @@ -99,7 +101,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { // For the mapper processing C, The SMJ is not initialized, no need to close it either. initDone = true; - super.initializeOp(hconf); + Collection> result = super.initializeOp(hconf); closeCalled = false; @@ -154,6 +156,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { } foundNextKeyGroup[pos] = false; } + return result; } @Override @@ -195,7 +198,7 @@ public void initializeMapredLocalWork(MapJoinDesc mjConf, Configuration hconf, HiveInputFormat.pushFilters(jobClone, ts); - ts.setExecContext(getExecContext()); + ts.passExecContext(getExecContext()); FetchOperator fetchOp = new FetchOperator(fetchWork, jobClone); ts.initialize(jobClone, new ObjectInspector[]{fetchOp.getOutputObjectInspector()}); @@ -231,7 +234,7 @@ public void cleanUpInputFileChangedOp() throws HiveException { } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { if (tag == posBigTable) { if (inputFileChanged) { @@ -555,7 +558,7 @@ private void fetchOneRow(byte tag) { fetchDone[tag] = true; return; } - forwardOp.processOp(row.o, tag); + forwardOp.process(row.o, tag); // check if any operator had a fatal error or early exit during // execution if (forwardOp.getDone()) { @@ -803,7 +806,7 @@ private boolean next(Integer current) throws IOException, HiveException { // Pass the row though the operator tree. It is guaranteed that not more than 1 row can // be produced from a input row. - forwardOp.processOp(nextRow.o, 0); + forwardOp.process(nextRow.o, 0); nextRow = sinkOp.getResult(); // It is possible that the row got absorbed in the operator tree. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java index daff398..f2eed44 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java @@ -27,12 +27,14 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.Timer; import java.util.TimerTask; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; @@ -259,7 +261,8 @@ public File getAbsolutePath(String filename) { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); firstRow = true; statsMap.put(Counter.DESERIALIZE_ERRORS.toString(), deserialize_error_count); @@ -280,11 +283,10 @@ protected void initializeOp(Configuration hconf) throws HiveException { outputObjInspector = scriptOutputDeserializer.getObjectInspector(); - // initialize all children before starting the script - initializeChildren(hconf); } catch (Exception e) { throw new HiveException(ErrorMsg.SCRIPT_INIT_ERROR.getErrorCodedMsg(), e); } + return result; } boolean isBrokenPipeException(IOException e) { @@ -321,7 +323,7 @@ public void setInputContext(String inputPath, String tableName, String partition } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { // initialize the user's process only when you receive the first row if (firstRow) { firstRow = false; @@ -573,6 +575,7 @@ public OutputStreamProcessor(ObjectInspector rowInspector) { this.rowInspector = rowInspector; } + @Override public void processLine(Writable line) throws HiveException { try { row = scriptOutputDeserializer.deserialize(line); @@ -583,6 +586,7 @@ public void processLine(Writable line) throws HiveException { forward(row, rowInspector); } + @Override public void close() { } } @@ -651,6 +655,7 @@ public ErrorStreamProcessor(int maxBytes) { } } + @Override public void processLine(Writable line) throws HiveException { String stringLine = line.toString(); @@ -693,6 +698,7 @@ public void processLine(Writable line) throws HiveException { bytesCopied += len; } + @Override public void close() { } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java index 1642926..cd7fb92 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java @@ -19,7 +19,9 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.util.Collection; import java.util.List; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; @@ -42,12 +44,12 @@ private transient boolean isSelectStarNoCompute = false; @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); // Just forward the row as is if (conf.isSelStarNoCompute()) { - initializeChildren(hconf); isSelectStarNoCompute = true; - return; + return result; } List colList = conf.getColList(); eval = new ExprNodeEvaluator[colList.size()]; @@ -64,11 +66,11 @@ protected void initializeOp(Configuration hconf) throws HiveException { } outputObjInspector = initEvaluatorsAndReturnStruct(eval, conf.getOutputColumnNames(), inputObjInspectors[0]); - initializeChildren(hconf); + return result; } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { if (isSelectStarNoCompute) { forward(row, inputObjInspectors[tag]); return; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java index 9557efd..39ffda5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java @@ -22,7 +22,9 @@ import java.io.ObjectOutputStream; import java.io.OutputStream; import java.io.Serializable; +import java.util.Collection; import java.util.Set; +import java.util.concurrent.Future; import org.apache.commons.io.FileExistsException; import org.apache.commons.logging.Log; @@ -50,7 +52,7 @@ private final PerfLogger perfLogger = PerfLogger.getPerfLogger(); protected static final Log LOG = LogFactory.getLog(SparkHashTableSinkOperator.class.getName()); - private HashTableSinkOperator htsOperator; + private final HashTableSinkOperator htsOperator; // The position of this table private byte tag; @@ -60,18 +62,20 @@ public SparkHashTableSinkOperator() { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); ObjectInspector[] inputOIs = new ObjectInspector[conf.getTagLength()]; inputOIs[tag] = inputObjInspectors[0]; conf.setTagOrder(new Byte[]{ tag }); htsOperator.setConf(conf); htsOperator.initialize(hconf, inputOIs); + return result; } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { // Ignore the tag passed in, which should be 0, not what we want - htsOperator.processOp(row, this.tag); + htsOperator.process(row, this.tag); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index cc5fe5e..cbf02e9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -20,9 +20,11 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -84,7 +86,7 @@ public void setTableDesc(TableDesc tableDesc) { * operator will be enhanced to read the table. **/ @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { if (rowLimit >= 0 && currCount++ >= rowLimit) { setDone(true); return; @@ -153,9 +155,9 @@ private void gatherStats(Object row) { values.add(o == null ? defaultPartitionName : o.toString()); } partitionSpecs = FileUtils.makePartName(conf.getPartColumns(), values); - if (isLogInfoEnabled) { - LOG.info("Stats Gathering found a new partition spec = " + partitionSpecs); - } + if (isLogInfoEnabled) { + LOG.info("Stats Gathering found a new partition spec = " + partitionSpecs); + } } // find which column contains the raw data size (both partitioned and non partitioned int uSizeColumn = -1; @@ -191,16 +193,17 @@ private void gatherStats(Object row) { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { - initializeChildren(hconf); + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); inputFileChanged = false; if (conf == null) { - return; + return result; } + rowLimit = conf.getRowLimit(); if (!conf.isGatherStats()) { - return; + return result; } this.hconf = hconf; @@ -216,9 +219,9 @@ protected void initializeOp(Configuration hconf) throws HiveException { stats = new HashMap(); if (conf.getPartColumns() == null || conf.getPartColumns().size() == 0) { // NON PARTITIONED table - return; + return result; } - + return result; } @Override @@ -282,7 +285,7 @@ private void publishStats() throws HiveException { if (!statsPublisher.connect(jc)) { // just return, stats gathering should not block the main query. if (isLogInfoEnabled) { - LOG.info("StatsPublishing error: cannot connect to database."); + LOG.info("StatsPublishing error: cannot connect to database."); } if (isStatsReliable) { throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TezDummyStoreOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TezDummyStoreOperator.java index 6a2d268..6bd156b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TezDummyStoreOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TezDummyStoreOperator.java @@ -33,8 +33,8 @@ * the records. */ @Override - public void processOp(Object row, int tag) throws HiveException { - super.processOp(row, tag); + public void process(Object row, int tag) throws HiveException { + super.process(row, tag); forward(result.o, outputObjInspector); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java index 5501459..94cecd0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java @@ -20,7 +20,9 @@ import java.io.Serializable; import java.util.Arrays; +import java.util.Collection; import java.util.List; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; @@ -57,7 +59,8 @@ transient AutoProgressor autoProgressor; @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); genericUDTF = conf.getGenericUDTF(); collector = new UDTFCollector(this); @@ -90,13 +93,11 @@ protected void initializeOp(Configuration hconf) throws HiveException { hconf, HiveConf.ConfVars.HIVES_AUTO_PROGRESS_TIMEOUT, TimeUnit.MILLISECONDS)); autoProgressor.go(); } - - // Initialize the rest of the operator DAG - super.initializeOp(hconf); + return result; } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { // The UDTF expects arguments in an object[] StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; List fields = soi.getAllStructFieldRefs(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java index 0fe176b..9bbaadd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java @@ -20,7 +20,9 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -54,7 +56,8 @@ * needsTransform[]. */ @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); int parents = parentOperators.size(); parentObjInspectors = new StructObjectInspector[parents]; @@ -116,11 +119,11 @@ protected void initializeOp(Configuration hconf) throws HiveException { + "] from " + inputObjInspectors[p] + " to " + outputObjInspector); } } - initializeChildren(hconf); + return result; } @Override - public synchronized void processOp(Object row, int tag) throws HiveException { + public synchronized void process(Object row, int tag) throws HiveException { StructObjectInspector soi = parentObjInspectors[tag]; List fields = parentFields[tag]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java index 4f37316..5999265 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java @@ -24,9 +24,9 @@ import java.util.List; import java.util.Map; -import org.apache.hadoop.conf.Configuration; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.MapOperator; import org.apache.hadoop.hive.ql.exec.MapredContext; @@ -105,6 +105,7 @@ public void configure(JobConf job) { } mo.setConf(mrwork); // initialize map operator + mo.initialize(job, null); mo.setChildren(job); l4j.info(mo.dump(0)); // initialize map local work @@ -113,9 +114,9 @@ public void configure(JobConf job) { MapredContext.init(true, new JobConf(jc)); - mo.setExecContext(execContext); + mo.passExecContext(execContext); mo.initializeLocalWork(jc); - mo.initialize(jc, null); + mo.initializeMapOperator(jc); if (localWork == null) { return; @@ -126,7 +127,7 @@ public void configure(JobConf job) { l4j.info("Initializing dummy operator"); List> dummyOps = localWork.getDummyParentOp(); for (Operator dummyOp : dummyOps){ - dummyOp.setExecContext(execContext); + dummyOp.passExecContext(execContext); dummyOp.initialize(jc,null); } } catch (Throwable e) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java index 42c5d22..f586d3b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java @@ -232,7 +232,7 @@ public void reduce(Object key, Iterator values, OutputCollector output, row.add(valueObject[tag]); try { - reducer.processOp(row, tag); + reducer.process(row, tag); } catch (Exception e) { String rowString = null; try { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java index 9581b72..96a6728 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java @@ -32,11 +32,10 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKeyObject; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -62,7 +61,8 @@ private MapJoinDesc desc; @Override - public void init(ExecMapperContext context, Configuration hconf, MapJoinOperator joinOp) { + public void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf, + MapJoinOperator joinOp) { this.context = context; this.hconf = hconf; this.joinOp = joinOp; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java index 3c86359..a5c1463 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java @@ -404,7 +404,7 @@ private void startForward(boolean inputFileChangeSenstive, String bigTableBucket if (row == null) { break; } - forwardOp.processOp(row.o, 0); + forwardOp.process(row.o, 0); } forwardOp.flush(); } @@ -445,7 +445,7 @@ private void initializeOperators(Map fetchOpJobConfMap) Operator forwardOp = work.getAliasToWork().get(alias); // put the exe context into all the operators - forwardOp.setExecContext(execContext); + forwardOp.passExecContext(execContext); // All the operators need to be initialized before process FetchOperator fetchOp = entry.getValue(); JobConf jobConf = fetchOpJobConfMap.get(fetchOp); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java index 398f7a5..a6f698d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java @@ -19,6 +19,10 @@ package org.apache.hadoop.hive.ql.exec.mr; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -32,18 +36,59 @@ public class ObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache { private static final Log LOG = LogFactory.getLog(ObjectCache.class.getName()); + private static final boolean isInfoEnabled = LOG.isInfoEnabled(); @Override public void release(String key) { // nothing to do + if (isInfoEnabled) { + LOG.info(key + " no longer needed"); + } } @Override - public Object retrieve(String key, Callable fn) throws HiveException { + public T retrieve(String key, Callable fn) throws HiveException { try { + if (isInfoEnabled) { + LOG.info("Creating " + key); + } return fn.call(); } catch (Exception e) { throw new HiveException(e); } } + + @Override + public Future retrieveAsync(String key, Callable fn) throws HiveException { + final T value = retrieve(key, fn); + + return new Future() { + + @Override + public boolean cancel(boolean mayInterruptIfRunning) { + return false; + } + + @Override + public boolean isCancelled() { + return false; + } + + @Override + public boolean isDone() { + return true; + } + + @Override + public T get() throws InterruptedException, ExecutionException { + return value; + } + + @Override + public T get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, + TimeoutException { + return value; + } + }; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java index 129e97b..fe108c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -58,7 +59,8 @@ private MapJoinDesc desc; @Override - public void init(ExecMapperContext context, Configuration hconf, MapJoinOperator joinOp) { + public void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf, + MapJoinOperator joinOp) { this.context = context; this.hconf = hconf; this.joinOp = joinOp; @@ -66,9 +68,9 @@ public void init(ExecMapperContext context, Configuration hconf, MapJoinOperator } @Override - public void load( - MapJoinTableContainer[] mapJoinTables, - MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage) throws HiveException { + public void load(MapJoinTableContainer[] mapJoinTables, + MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage) + throws HiveException { // Note: it's possible that a MJ operator is in a ReduceWork, in which case the // currentInputPath will be null. But, since currentInputPath is only interesting diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java index 40e339b..c7c8146 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java @@ -18,6 +18,10 @@ package org.apache.hadoop.hive.ql.exec.spark; +import java.io.IOException; +import java.util.Iterator; +import java.util.List; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.MapOperator; @@ -29,7 +33,6 @@ import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator; import org.apache.hadoop.hive.ql.log.PerfLogger; -import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -38,10 +41,6 @@ import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; -import java.io.IOException; -import java.util.Iterator; -import java.util.List; - /** * Clone from ExecMapper. SparkMapRecordHandler is the bridge between the spark framework and @@ -61,6 +60,7 @@ private boolean isLogInfoEnabled = false; private ExecMapperContext execContext; + @Override public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS); super.init(job, output, reporter); @@ -81,6 +81,7 @@ mo.setConf(mrwork); // initialize map operator + mo.initialize(jc, null); mo.setChildren(job); LOG.info(mo.dump(0)); // initialize map local work @@ -90,9 +91,9 @@ MapredContext.init(true, new JobConf(jc)); MapredContext.get().setReporter(reporter); - mo.setExecContext(execContext); + mo.passExecContext(execContext); mo.initializeLocalWork(jc); - mo.initialize(jc, null); + mo.initializeMapOperator(jc); OperatorUtils.setChildrenCollector(mo.getChildOperators(), output); mo.setReporter(rp); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMergeFileRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMergeFileRecordHandler.java index 9777065..fdc8452 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMergeFileRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMergeFileRecordHandler.java @@ -93,7 +93,7 @@ public void processRow(Object key, Object value) throws IOException { row[0] = key; row[1] = value; try { - mergeOp.processOp(row, 0); + mergeOp.process(row, 0); } catch (HiveException e) { abort = true; throw new IOException(e); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index 2eab7bd..d4c1973 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -103,6 +103,7 @@ private List[] valueStringWriters; private MapredLocalWork localWork = null; + @Override @SuppressWarnings("unchecked") public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS); @@ -132,7 +133,7 @@ public void init(JobConf job, OutputCollector output, Reporter reporter) throws keyStructInspector = (StructObjectInspector) keyObjectInspector; batches = new VectorizedRowBatch[maxTags]; valueStructInspectors = new StructObjectInspector[maxTags]; - valueStringWriters = (List[]) new List[maxTags]; + valueStringWriters = new List[maxTags]; keysColumnOffset = keyStructInspector.getAllStructFieldRefs().size(); buffer = new DataOutputBuffer(); } @@ -196,7 +197,7 @@ public void init(JobConf job, OutputCollector output, Reporter reporter) throws localWork = gWork.getMapRedLocalWork(); execContext.setJc(jc); execContext.setLocalWork(localWork); - reducer.setExecContext(execContext); + reducer.passExecContext(execContext); reducer.setReporter(rp); OperatorUtils.setChildrenCollector( @@ -318,7 +319,7 @@ public void processRow(Object key, Object value) throws IOException { logMemoryInfo(); } try { - reducer.processOp(row, tag); + reducer.process(row, tag); } catch (Exception e) { String rowString = null; try { @@ -360,7 +361,7 @@ public void processRow(Object key, Object value) throws IOException { rowIdx++; if (rowIdx >= BATCH_SIZE) { VectorizedBatchUtil.setBatchSize(batch, rowIdx); - reducer.processOp(batch, tag); + reducer.process(batch, tag); rowIdx = 0; if (isLogInfoEnabled) { logMemoryInfo(); @@ -369,7 +370,7 @@ public void processRow(Object key, Object value) throws IOException { } if (rowIdx > 0) { VectorizedBatchUtil.setBatchSize(batch, rowIdx); - reducer.processOp(batch, tag); + reducer.process(batch, tag); } if (isLogInfoEnabled) { logMemoryInfo(); @@ -401,6 +402,7 @@ private Object deserializeValue(BytesWritable valueWritable, byte tag) throws Hi } } + @Override public void close() { // No row was processed diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java index 7402ba3..adb7a92 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.tez; import java.io.IOException; +import java.util.Collections; import java.util.Map; import org.apache.commons.logging.Log; @@ -26,11 +27,9 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.MapredContext; -import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionHandler; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; @@ -43,6 +42,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.Writable; +import org.apache.tez.runtime.api.Input; import org.apache.tez.runtime.api.LogicalInput; import org.apache.tez.runtime.library.api.KeyValueReader; @@ -54,30 +54,29 @@ private static final Log LOG = LogFactory.getLog(HashTableLoader.class.getName()); - private ExecMapperContext context; private Configuration hconf; private MapJoinDesc desc; + private TezContext tezContext; @Override - public void init(ExecMapperContext context, Configuration hconf, MapJoinOperator joinOp) { - this.context = context; + public void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf, + MapJoinOperator joinOp) { + this.tezContext = (TezContext) mrContext; this.hconf = hconf; this.desc = joinOp.getConf(); } @Override - public void load( - MapJoinTableContainer[] mapJoinTables, - MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage) throws HiveException { + public void load(MapJoinTableContainer[] mapJoinTables, + MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage) + throws HiveException { - TezContext tezContext = (TezContext) MapredContext.get(); Map parentToInput = desc.getParentToInput(); Map parentKeyCounts = desc.getParentKeyCounts(); boolean useOptimizedTables = HiveConf.getBoolVar( hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); boolean isFirstKey = true; - TezCacheAccess tezCacheAccess = TezCacheAccess.createInstance(hconf); for (int pos = 0; pos < mapJoinTables.length; pos++) { if (pos == desc.getPosBigTable()) { continue; @@ -87,6 +86,14 @@ public void load( LogicalInput input = tezContext.getInput(inputName); try { + input.start(); + tezContext.getTezProcessorContext().waitForAnyInputReady( + Collections. singletonList(input)); + } catch (Exception e) { + throw new HiveException(e); + } + + try { KeyValueReader kvReader = (KeyValueReader) input.getReader(); MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext(); @@ -122,14 +129,6 @@ public void load( } catch (Exception e) { throw new HiveException(e); } - // Register that the Input has been cached. - LOG.info("Is this a bucket map join: " + desc.isBucketMapJoin()); - // cache is disabled for bucket map join because of the same reason - // given in loadHashTable in MapJoinOperator. - if (!desc.isBucketMapJoin()) { - tezCacheAccess.registerCachedInput(inputName); - LOG.info("Setting Input: " + inputName + " as cached"); - } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java index 411ab67..d509c35 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java @@ -22,9 +22,11 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import java.util.TreeMap; import java.util.concurrent.Callable; @@ -55,6 +57,7 @@ import org.apache.tez.mapreduce.input.MRInputLegacy; import org.apache.tez.mapreduce.input.MultiMRInput; import org.apache.tez.mapreduce.processor.MRTaskReporter; +import org.apache.tez.runtime.api.Input; import org.apache.tez.runtime.api.LogicalInput; import org.apache.tez.runtime.api.LogicalOutput; import org.apache.tez.runtime.api.ProcessorContext; @@ -98,6 +101,7 @@ public MapRecordProcessor(final JobConf jconf) throws Exception { // create map and fetch operators mapWork = (MapWork) cache.retrieve(key, new Callable() { + @Override public Object call() { return Utilities.getMapWork(jconf); } @@ -119,6 +123,7 @@ public Object call() { mergeWorkList.add( (MapWork) cache.retrieve(key, new Callable() { + @Override public Object call() { return Utilities.getMergeWork(jconf, prefix); } @@ -133,6 +138,10 @@ void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrRep perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); super.init(jconf, processorContext, mrReporter, inputs, outputs); + MapredContext.init(true, new JobConf(jconf)); + ((TezContext) MapredContext.get()).setInputs(inputs); + ((TezContext) MapredContext.get()).setTezProcessorContext(processorContext); + // Update JobConf using MRInput, info like filename comes via this legacyMRInput = getMRInput(inputs); if (legacyMRInput != null) { @@ -160,6 +169,8 @@ void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrRep mapOp = new MapOperator(); } + mapOp.setExecContext(execContext); + connectOps.clear(); if (mergeWorkList != null) { MapOperator mergeMapOp = null; @@ -176,12 +187,13 @@ void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrRep mergeMapOp.setConf(mergeMapWork); l4j.info("Input name is " + mergeMapWork.getName()); jconf.set(Utilities.INPUT_NAME, mergeMapWork.getName()); + mergeMapOp.initialize(jconf, null); mergeMapOp.setChildren(jconf); DummyStoreOperator dummyOp = getJoinParentOp(mergeMapOp); connectOps.put(mergeMapWork.getTag(), dummyOp); - mergeMapOp.setExecContext(new ExecMapperContext(jconf)); + mergeMapOp.passExecContext(new ExecMapperContext(jconf)); mergeMapOp.initializeLocalWork(jconf); } } @@ -191,21 +203,18 @@ void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrRep mapOp.setConf(mapWork); l4j.info("Main input name is " + mapWork.getName()); jconf.set(Utilities.INPUT_NAME, mapWork.getName()); + mapOp.initialize(jconf, null); mapOp.setChildren(jconf); l4j.info(mapOp.dump(0)); - MapredContext.init(true, new JobConf(jconf)); - ((TezContext) MapredContext.get()).setInputs(inputs); - ((TezContext) MapredContext.get()).setTezProcessorContext(processorContext); - mapOp.setExecContext(execContext); mapOp.initializeLocalWork(jconf); initializeMapRecordSources(); - mapOp.initialize(jconf, null); + mapOp.initializeMapOperator(jconf); if ((mergeMapOpList != null) && mergeMapOpList.isEmpty() == false) { for (MapOperator mergeMapOp : mergeMapOpList) { jconf.set(Utilities.INPUT_NAME, mergeMapOp.getConf().getName()); - mergeMapOp.initialize(jconf, null); + mergeMapOp.initializeMapOperator(jconf); } } @@ -353,6 +362,17 @@ void close(){ private MRInputLegacy getMRInput(Map inputs) throws Exception { // there should be only one MRInput MRInputLegacy theMRInput = null; + + // start all mr/multi-mr inputs + Set li = new HashSet(); + for (LogicalInput inp: inputs.values()) { + if (inp instanceof MRInputLegacy || inp instanceof MultiMRInput) { + inp.start(); + li.add(inp); + } + } + processorContext.waitForAllInputsReady(li); + l4j.info("The input names are: " + Arrays.toString(inputs.keySet().toArray())); for (Entry inp : inputs.entrySet()) { if (inp.getValue() instanceof MRInputLegacy) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java index 5937872..8a8fe55 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java @@ -98,10 +98,11 @@ void init(final JobConf jconf, ProcessorContext processorContext, cacheKey = queryId + MAP_PLAN_KEY; MapWork mapWork = (MapWork) cache.retrieve(cacheKey, new Callable() { - public Object call() { - return Utilities.getMapWork(jconf); - } - }); + @Override + public Object call() { + return Utilities.getMapWork(jconf); + } + }); Utilities.setMapWork(jconf, mapWork); if (mapWork instanceof MergeFileWork) { @@ -116,7 +117,7 @@ public Object call() { MapredContext.init(true, new JobConf(jconf)); ((TezContext) MapredContext.get()).setInputs(inputs); - mergeOp.setExecContext(execContext); + mergeOp.passExecContext(execContext); mergeOp.initializeLocalWork(jconf); mergeOp.initialize(jconf, null); @@ -198,7 +199,7 @@ private boolean processRow(Object key, Object value) { } else { row[0] = key; row[1] = value; - mergeOp.processOp(row, 0); + mergeOp.process(row, 0); } } catch (Throwable e) { abort = true; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java index d444572..c0bcb21 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java @@ -19,11 +19,14 @@ package org.apache.hadoop.hive.ql.exec.tez; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.tez.runtime.api.ObjectRegistry; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.tez.runtime.api.ObjectRegistry; import com.google.common.base.Preconditions; @@ -41,6 +44,8 @@ // before anything else. private volatile static ObjectRegistry staticRegistry; + private static ExecutorService staticPool; + private final ObjectRegistry registry; public ObjectCache() { @@ -51,6 +56,7 @@ public ObjectCache() { public static void setupObjectRegistry(ObjectRegistry objectRegistry) { staticRegistry = objectRegistry; + staticPool = Executors.newCachedThreadPool(); } @Override @@ -59,21 +65,32 @@ public void release(String key) { LOG.info("Releasing key: " + key); } + @SuppressWarnings("unchecked") @Override - public Object retrieve(String key, Callable fn) throws HiveException { - Object o; + public T retrieve(String key, Callable fn) throws HiveException { + T value; try { - o = registry.get(key); - if (o == null) { - o = fn.call(); - LOG.info("Caching key: " + key); - registry.cacheForVertex(key, o); + value = (T) registry.get(key); + if (value == null) { + value = fn.call(); + LOG.info("Caching key: " + key); + registry.cacheForVertex(key, value); } else { - LOG.info("Found " + key + " in cache with value: " + o); + LOG.info("Found " + key + " in cache with value: " + value); } } catch (Exception e) { throw new HiveException(e); } - return o; + return value; + } + + @Override + public Future retrieveAsync(final String key, final Callable fn) throws HiveException { + return staticPool.submit(new Callable() { + @Override + public T call() throws Exception { + return retrieve(key, fn); + } + }); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 071b144..63c63b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.tez; import java.util.ArrayList; +import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -85,6 +86,7 @@ void init(final JobConf jconf, ProcessorContext processorContext, String queryId = HiveConf.getVar(jconf, HiveConf.ConfVars.HIVEQUERYID); cacheKey = queryId + REDUCE_PLAN_KEY; redWork = (ReduceWork) cache.retrieve(cacheKey, new Callable() { + @Override public Object call() { return Utilities.getReduceWork(jconf); } @@ -103,9 +105,14 @@ public Object call() { for (int tag = 0; tag < redWork.getTagToValueDesc().size(); tag++) { TableDesc keyTableDesc = redWork.getKeyDesc(); TableDesc valueTableDesc = redWork.getTagToValueDesc().get(tag); - KeyValuesReader reader = - (KeyValuesReader) inputs.get(redWork.getTagToInput().get(tag)).getReader(); + // make the reader ready for prime time + Input input = inputs.get(redWork.getTagToInput().get(tag)); + input.start(); + processorContext.waitForAnyInputReady(Collections.singleton(input)); + KeyValuesReader reader = (KeyValuesReader) input.getReader(); + + // now we can setup the record source sources[tag] = new ReduceRecordSource(); sources[tag].init(jconf, reducer, redWork.getVectorMode(), keyTableDesc, valueTableDesc, reader, tag == position, (byte) tag, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index aa80510..1a43b72 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -325,7 +325,7 @@ public void next() throws HiveException { row.add(deserializeValue(valueWritable, tag)); try { - reducer.processOp(row, tag); + reducer.process(row, tag); } catch (Exception e) { String rowString = null; try { @@ -364,7 +364,7 @@ private void processVectors(Iterable values, byte tag) throws HiveExcept rowIdx++; if (rowIdx >= BATCH_SIZE) { VectorizedBatchUtil.setBatchSize(batch, rowIdx); - reducer.processOp(batch, tag); + reducer.process(batch, tag); // Reset just the value columns and value buffer. for (int i = keysColumnOffset; i < batch.numCols; i++) { @@ -377,7 +377,7 @@ private void processVectors(Iterable values, byte tag) throws HiveExcept if (rowIdx > 0) { // Flush final partial batch. VectorizedBatchUtil.setBatchSize(batch, rowIdx); - reducer.processOp(batch, tag); + reducer.process(batch, tag); } batch.reset(); keyBuffer.reset(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezCacheAccess.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezCacheAccess.java deleted file mode 100644 index c303ba8..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezCacheAccess.java +++ /dev/null @@ -1,82 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.tez; - -import java.util.Collections; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.locks.ReentrantLock; -import java.util.concurrent.Callable; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.ObjectCache; -import org.apache.hadoop.hive.ql.exec.ObjectCacheFactory; -import org.apache.hadoop.hive.ql.metadata.HiveException; - -/** - * Access to the Object cache from Tez, along with utility methods for accessing specific Keys. - */ -public class TezCacheAccess { - - private TezCacheAccess(ObjectCache cache, String qId) { - this.qId = qId; - this.cache = cache; - } - - private ObjectCache cache; - private String qId; - - public static TezCacheAccess createInstance(Configuration conf) { - ObjectCache cache = ObjectCacheFactory.getCache(conf); - String qId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID); - return new TezCacheAccess(cache, qId); - } - - private static final String CACHED_INPUT_KEY = "CACHED_INPUTS"; - - private final ReentrantLock cachedInputLock = new ReentrantLock(); - - private Set get() throws HiveException { - return (Set) cache.retrieve(CACHED_INPUT_KEY, - new Callable() { - public Object call() { - return Collections.newSetFromMap(new ConcurrentHashMap()); - } - }); - } - - public boolean isInputCached(String inputName) throws HiveException { - this.cachedInputLock.lock(); - try { - return get().contains(qId+inputName); - } finally { - this.cachedInputLock.unlock(); - } - } - - public void registerCachedInput(String inputName) throws HiveException { - this.cachedInputLock.lock(); - try { - get().add(qId+inputName); - } finally { - this.cachedInputLock.unlock(); - } - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java index 1e528a9..95bf8c9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java @@ -19,9 +19,9 @@ import java.io.IOException; import java.text.NumberFormat; -import java.util.Collections; import java.util.List; import java.util.Map; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -33,7 +33,6 @@ import org.apache.tez.mapreduce.processor.MRTaskReporter; import org.apache.tez.runtime.api.AbstractLogicalIOProcessor; import org.apache.tez.runtime.api.Event; -import org.apache.tez.runtime.api.Input; import org.apache.tez.runtime.api.LogicalInput; import org.apache.tez.runtime.api.LogicalOutput; import org.apache.tez.runtime.api.ProcessorContext; @@ -143,20 +142,6 @@ protected void initializeAndRunProcessor(Map inputs, throws Exception { Throwable originalThrowable = null; try { - // Outputs will be started later by the individual Processors. - TezCacheAccess cacheAccess = TezCacheAccess.createInstance(jobConf); - // Start the actual Inputs. After MRInput initialization. - for (Map.Entry inputEntry : inputs.entrySet()) { - if (!cacheAccess.isInputCached(inputEntry.getKey())) { - LOG.info("Starting input " + inputEntry.getKey()); - inputEntry.getValue().start(); - processorContext.waitForAnyInputReady(Collections.singletonList((Input) (inputEntry - .getValue()))); - } else { - LOG.info("Input: " + inputEntry.getKey() - + " is already cached. Skipping start and wait for ready"); - } - } MRTaskReporter mrReporter = new MRTaskReporter(getContext()); rproc.init(jobConf, getContext(), mrReporter, inputs, outputs); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java index d05cc23..7aa279a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java @@ -18,7 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.io.IOException; +import java.util.Collection; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; @@ -27,15 +28,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeStats; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.ObjectWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; /** @@ -59,18 +54,19 @@ public VectorAppMasterEventOperator() { } @Override - public void initializeOp(Configuration hconf) throws HiveException { - super.initializeOp(hconf); + public Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); valueWriters = VectorExpressionWriterFactory.getExpressionWriters( (StructObjectInspector) inputObjInspectors[0]); singleRow = new Object[valueWriters.length]; + return result; } @Override - public void processOp(Object data, int tag) throws HiveException { - + public void process(Object data, int tag) throws HiveException { + VectorizedRowBatch vrg = (VectorizedRowBatch) data; - + Writable [] records = null; Writable recordValue = null; boolean vectorizedSerde = false; @@ -85,7 +81,7 @@ public void processOp(Object data, int tag) throws HiveException { } catch (SerDeException e1) { throw new HiveException(e1); } - + for (int i = 0; i < vrg.size; i++) { Writable row = null; if (vectorizedSerde) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java index 858604c..bfa8134 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Collection; +import java.util.concurrent.Future; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -50,7 +53,7 @@ public VectorFileSinkOperator() { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { // We need a input object inspector that is for the row we will extract out of the // vectorized row batch, not for example, an original inspector for an ORC table, etc. VectorExpressionWriterFactory.processVectorInspector( @@ -66,15 +69,15 @@ public void assign(VectorExpressionWriter[] writers, singleRow = new Object[valueWriters.length]; // Call FileSinkOperator with new input inspector. - super.initializeOp(hconf); + return super.initializeOp(hconf); } @Override - public void processOp(Object data, int tag) throws HiveException { + public void process(Object data, int tag) throws HiveException { VectorizedRowBatch vrg = (VectorizedRowBatch)data; for (int i = 0; i < vrg.size; i++) { Object[] row = getRowObject(vrg, i); - super.processOp(row, tag); + super.process(row, tag); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index e464b96..d1b8939 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Collection; +import java.util.concurrent.Future; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FilterOperator; @@ -27,7 +30,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.api.OperatorType; /** * Filter operator implementation. @@ -39,7 +41,7 @@ private VectorExpression conditionEvaluator = null; // Temporary selected vector - private int[] temporarySelected = new int [VectorizedRowBatch.DEFAULT_SIZE]; + private final int[] temporarySelected = new int [VectorizedRowBatch.DEFAULT_SIZE]; // filterMode is 1 if condition is always true, -1 if always false // and 0 if condition needs to be computed. @@ -59,7 +61,8 @@ public VectorFilterOperator() { @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); try { heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT); @@ -74,7 +77,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { filterMode = -1; } } - initializeChildren(hconf); + return result; } public void setFilterCondition(VectorExpression expr) { @@ -82,7 +85,7 @@ public void setFilterCondition(VectorExpression expr) { } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { VectorizedRowBatch vrg = (VectorizedRowBatch) row; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 402b324..4ca82e4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -22,18 +22,19 @@ import java.lang.management.MemoryMXBean; import java.lang.ref.SoftReference; import java.util.ArrayList; -import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.KeyWrapper; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; @@ -43,6 +44,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -54,7 +56,8 @@ * stores the aggregate operators' intermediate states. Emits row mode output. * */ -public class VectorGroupByOperator extends GroupByOperator implements VectorizationContextRegion { +public class VectorGroupByOperator extends Operator implements + VectorizationContextRegion { private static final Log LOG = LogFactory.getLog( VectorGroupByOperator.class.getName()); @@ -100,7 +103,15 @@ private transient VectorizedRowBatchCtx vrbCtx; private transient VectorColumnAssign[] vectorColumnAssign; - + + private transient int numEntriesHashTable; + + private transient long maxHashTblMemory; + + private transient long maxMemory; + + private float memoryThreshold; + /** * Interface for processing mode: global, hash, unsorted streaming, or group batch */ @@ -118,9 +129,11 @@ private abstract class ProcessingModeBase implements IProcessingMode { // Overridden and used in sorted reduce group batch processing mode. + @Override public void startGroup() throws HiveException { // Do nothing. } + @Override public void endGroup() throws HiveException { // Do nothing. } @@ -177,7 +190,7 @@ protected VectorAggregationBufferRow allocateAggregationBuffer() throws HiveExce private class ProcessingModeGlobalAggregate extends ProcessingModeBase { /** - * In global processing mode there is only one set of aggregation buffers + * In global processing mode there is only one set of aggregation buffers */ private VectorAggregationBufferRow aggregationBuffers; @@ -233,7 +246,7 @@ public void close(boolean aborted) throws HiveException { private long sumBatchSize; /** - * Max number of entries in the vector group by aggregation hashtables. + * Max number of entries in the vector group by aggregation hashtables. * Exceeding this will trigger a flush irrelevant of memory pressure condition. */ private int maxHtEntries = 1000000; @@ -247,12 +260,12 @@ public void close(boolean aborted) throws HiveException { * Percent of entries to flush when memory threshold exceeded. */ private float percentEntriesToFlush = 0.1f; - + /** * A soft reference used to detect memory pressure */ private SoftReference gcCanary = new SoftReference(new Object()); - + /** * Counts the number of time the gcCanary died and was resurrected */ @@ -289,7 +302,7 @@ public void initialize(Configuration hconf) throws HiveException { HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION); this.numRowsCompareHashAggr = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL); - } + } else { this.percentEntriesToFlush = HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT.defaultFloatVal; @@ -322,14 +335,14 @@ public void processBatch(VectorizedRowBatch batch) throws HiveException { processAggregators(batch); //Flush if memory limits were reached - // We keep flushing until the memory is under threshold + // We keep flushing until the memory is under threshold int preFlushEntriesCount = numEntriesHashTable; while (shouldFlush(batch)) { flush(false); if(gcCanary.get() == null) { gcCanaryFlushes++; - gcCanary = new SoftReference(new Object()); + gcCanary = new SoftReference(new Object()); } //Validate that some progress is being made @@ -468,7 +481,7 @@ private void flush(boolean all) throws HiveException { mapKeysAggregationBuffers.clear(); numEntriesHashTable = 0; } - + if (all && LOG.isDebugEnabled()) { LOG.debug(String.format("GC canary caused %d flushes", gcCanaryFlushes)); } @@ -495,7 +508,7 @@ private boolean shouldFlush(VectorizedRowBatch batch) { if (gcCanary.get() == null) { return true; } - + return false; } @@ -515,14 +528,14 @@ private void updateAvgVariableSize(VectorizedRowBatch batch) { } /** - * Checks if the HT reduces the number of entries by at least minReductionHashAggr factor + * Checks if the HT reduces the number of entries by at least minReductionHashAggr factor * @throws HiveException */ private void checkHashModeEfficiency() throws HiveException { if (lastModeCheckRowCount > numRowsCompareHashAggr) { lastModeCheckRowCount = 0; if (LOG.isDebugEnabled()) { - LOG.debug(String.format("checkHashModeEfficiency: HT:%d RC:%d MIN:%d", + LOG.debug(String.format("checkHashModeEfficiency: HT:%d RC:%d MIN:%d", numEntriesHashTable, sumBatchSize, (long)(sumBatchSize * minReductionHashAggr))); } if (numEntriesHashTable > sumBatchSize * minReductionHashAggr) { @@ -541,7 +554,7 @@ private void checkHashModeEfficiency() throws HiveException { */ private class ProcessingModeUnsortedStreaming extends ProcessingModeBase { - /** + /** * The aggregation buffers used in streaming mode */ private VectorAggregationBufferRow currentStreamingAggregators; @@ -554,19 +567,19 @@ private void checkHashModeEfficiency() throws HiveException { /** * The keys that needs to be flushed at the end of the current batch */ - private final VectorHashKeyWrapper[] keysToFlush = + private final VectorHashKeyWrapper[] keysToFlush = new VectorHashKeyWrapper[VectorizedRowBatch.DEFAULT_SIZE]; /** * The aggregates that needs to be flushed at the end of the current batch */ - private final VectorAggregationBufferRow[] rowsToFlush = + private final VectorAggregationBufferRow[] rowsToFlush = new VectorAggregationBufferRow[VectorizedRowBatch.DEFAULT_SIZE]; /** * A pool of VectorAggregationBufferRow to avoid repeated allocations */ - private VectorUtilBatchObjectPool + private VectorUtilBatchObjectPool streamAggregationBufferRowPool; @Override @@ -658,7 +671,7 @@ public void close(boolean aborted) throws HiveException { * vectorized reduce-shuffle feeds the batches to us. * * 2) Later at endGroup after reduce-shuffle has fed us all the input batches for the group, - * we fill in the aggregation columns in outputBatch at outputBatch.size. Our method + * we fill in the aggregation columns in outputBatch at outputBatch.size. Our method * writeGroupRow does this and finally increments outputBatch.size. * */ @@ -672,7 +685,7 @@ public void close(boolean aborted) throws HiveException { */ VectorGroupKeyHelper groupKeyHelper; - /** + /** * The group vector aggregation buffers. */ private VectorAggregationBufferRow groupAggregators; @@ -750,7 +763,7 @@ public VectorGroupByOperator(VectorizationContext vContext, OperatorDesc conf) AggregationDesc aggDesc = aggrDesc.get(i); aggregators[i] = vContext.getAggregatorExpression(aggDesc, desc.getVectorDesc().isReduce()); } - + isVectorOutput = desc.getVectorDesc().isVectorOutput(); vOutContext = new VectorizationContext(desc.getOutputColumnNames()); @@ -762,7 +775,8 @@ public VectorGroupByOperator() { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); List objectInspectors = new ArrayList(); @@ -773,9 +787,9 @@ protected void initializeOp(Configuration hconf) throws HiveException { // grouping id should be pruned, which is the last of key columns // see ColumnPrunerGroupByProc - outputKeyLength = + outputKeyLength = conf.pruneGroupingSetId() ? keyExpressions.length - 1 : keyExpressions.length; - + keyOutputWriters = new VectorExpressionWriter[outputKeyLength]; for(int i = 0; i < outputKeyLength; ++i) { @@ -812,8 +826,6 @@ protected void initializeOp(Configuration hconf) throws HiveException { throw new HiveException(e); } - initializeChildren(hconf); - forwardCache = new Object[outputKeyLength + aggregators.length]; if (outputKeyLength == 0) { @@ -826,13 +838,14 @@ protected void initializeOp(Configuration hconf) throws HiveException { processingMode = this.new ProcessingModeHashAggregate(); } processingMode.initialize(hconf); + return result; } /** * changes the processing mode to unsorted streaming - * This is done at the request of the hash agg mode, if the number of keys + * This is done at the request of the hash agg mode, if the number of keys * exceeds the minReductionHashAggr factor - * @throws HiveException + * @throws HiveException */ private void changeToUnsortedStreamingMode() throws HiveException { processingMode = this.new ProcessingModeUnsortedStreaming(); @@ -859,7 +872,7 @@ public void endGroup() throws HiveException { } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; if (batch.size > 0) { @@ -962,4 +975,9 @@ public void setAggregators(VectorAggregateExpression[] aggregators) { public VectorizationContext getOuputVectorizationContext() { return vOutContext; } + + @Override + public OperatorType getType() { + return OperatorType.GROUPBY; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java index 4e47f35..2f4e46b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -39,7 +39,7 @@ public VectorLimitOperator(VectorizationContext vContext, OperatorDesc conf) { } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; if (currCount < limit) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 2c8aee1..2d1e29d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -19,9 +19,11 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -56,7 +58,7 @@ private VectorExpression[] bigTableFilterExpressions; private VectorExpression[] bigTableValueExpressions; - + private VectorizationContext vOutContext; // The above members are initialized by the constructor and must not be @@ -76,7 +78,7 @@ private transient VectorExpressionWriter[] keyOutputWriters; private transient VectorizedRowBatchCtx vrbCtx = null; - + public VectorMapJoinOperator() { super(); } @@ -112,9 +114,9 @@ public VectorMapJoinOperator (VectorizationContext vContext, OperatorDesc conf) } @Override - public void initializeOp(Configuration hconf) throws HiveException { - super.initializeOp(hconf); - + public Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); + List keyDesc = conf.getKeys().get(posBigTable); keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc); @@ -178,6 +180,7 @@ protected Object _evaluate(Object row, int version) throws HiveException { filterMaps[posBigTable] = null; outputVectorAssigners = new HashMap(); + return result; } /** @@ -220,7 +223,7 @@ protected void setMapJoinKey(ReusableGetAdaptor dest, Object row, byte alias) } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { byte alias = (byte) tag; VectorizedRowBatch inBatch = (VectorizedRowBatch) row; @@ -246,7 +249,7 @@ public void processOp(Object row, int tag) throws HiveException { // of row-mode small-tables) this is a reasonable trade-off. // for(batchIndex=0; batchIndex < inBatch.size; ++batchIndex) { - super.processOp(row, tag); + super.process(row, tag); } // Set these two to invalid values so any attempt to use them diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java index 0ae0186..4426bfd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Collection; +import java.util.concurrent.Future; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -34,7 +37,7 @@ // Writer for producing row from input batch. private VectorExpressionWriter[] rowWriters; - + protected transient Object[] singleRow; public VectorReduceSinkOperator(VectorizationContext vContext, OperatorDesc conf) @@ -49,7 +52,7 @@ public VectorReduceSinkOperator() { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { // We need a input object inspector that is for the row we will extract out of the // vectorized row batch, not for example, an original inspector for an ORC table, etc. VectorExpressionWriterFactory.processVectorInspector( @@ -64,17 +67,16 @@ public void assign(VectorExpressionWriter[] writers, }); singleRow = new Object[rowWriters.length]; - // Call ReduceSinkOperator with new input inspector. - super.initializeOp(hconf); + return super.initializeOp(hconf); } @Override - public void processOp(Object data, int tag) throws HiveException { + public void process(Object data, int tag) throws HiveException { VectorizedRowBatch vrg = (VectorizedRowBatch) data; for (int batchIndex = 0 ; batchIndex < vrg.size; ++batchIndex) { Object row = getRowObject(vrg, batchIndex); - super.processOp(row, tag); + super.process(row, tag); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index ecd92d5..ebb6840 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -19,9 +19,11 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -47,8 +49,8 @@ public class VectorSMBMapJoinOperator extends SMBMapJoinOperator implements VectorizationContextRegion { private static final Log LOG = LogFactory.getLog( - VectorSMBMapJoinOperator.class.getName()); - + VectorSMBMapJoinOperator.class.getName()); + private static final long serialVersionUID = 1L; private VectorExpression[] bigTableValueExpressions; @@ -65,7 +67,7 @@ // transient. //--------------------------------------------------------------------------- - private transient VectorizedRowBatch outputBatch; + private transient VectorizedRowBatch outputBatch; private transient VectorizedRowBatchCtx vrbCtx = null; @@ -78,23 +80,23 @@ private transient VectorHashKeyWrapper[] keyValues; private transient SMBJoinKeyEvaluator keyEvaluator; - + private transient VectorExpressionWriter[] valueWriters; - + private interface SMBJoinKeyEvaluator { List evaluate(VectorHashKeyWrapper kw) throws HiveException; -} +} public VectorSMBMapJoinOperator() { super(); } - + public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf) throws HiveException { this(); SMBJoinDesc desc = (SMBJoinDesc) conf; this.conf = desc; - + order = desc.getTagOrder(); numAliases = desc.getExprs().size(); posBigTable = (byte) desc.getPosBigTable(); @@ -118,7 +120,7 @@ public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf vOutContext = new VectorizationContext(desc.getOutputColumnNames()); vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias()); } - + @Override protected List smbJoinComputeKeys(Object row, byte alias) throws HiveException { if (alias == this.posBigTable) { @@ -127,21 +129,21 @@ public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf } else { return super.smbJoinComputeKeys(row, alias); } - } - + } + @Override - protected void initializeOp(Configuration hconf) throws HiveException { - super.initializeOp(hconf); + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); vrbCtx = new VectorizedRowBatchCtx(); vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector); - + outputBatch = vrbCtx.createVectorizedRowBatch(); - + keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); - + outputVectorAssigners = new HashMap(); - + // This key evaluator translates from the vectorized VectorHashKeyWrapper format // into the row-mode MapJoinKey keyEvaluator = new SMBJoinKeyEvaluator() { @@ -163,14 +165,14 @@ public SMBJoinKeyEvaluator init() { return key; }; }.init(); - + Map> valueExpressions = conf.getExprs(); - List bigTableExpressions = valueExpressions.get(posBigTable); - + List bigTableExpressions = valueExpressions.get(posBigTable); + // We're hijacking the big table evaluators and replacing them with our own custom ones // which are going to return values from the input batch vector expressions List vectorNodeEvaluators = new ArrayList(bigTableExpressions.size()); - + VectorExpressionWriterFactory.processVectorExpressions( bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() { @@ -180,7 +182,7 @@ public void assign(VectorExpressionWriter[] writers, List oids) valueWriters = writers; joinValuesObjectInspectors[posBigTable] = oids; } - }); + }); for(int i=0; i implements + VectorizationContextRegion { private static final long serialVersionUID = 1L; @@ -62,7 +65,7 @@ public VectorSelectOperator(VectorizationContext vContext, OperatorDesc conf) } /** - * Create a new vectorization context to create a new projection, but keep + * Create a new vectorization context to create a new projection, but keep * same output column manager must be inherited to track the scratch the columns. */ vOutContext = new VectorizationContext(vContext); @@ -74,7 +77,7 @@ public VectorSelectOperator(VectorizationContext vContext, OperatorDesc conf) for (int i=0; i < colList.size(); ++i) { String columnName = this.conf.getOutputColumnNames().get(i); VectorExpression ve = vExpressions[i]; - vOutContext.addProjectionColumn(columnName, + vOutContext.addProjectionColumn(columnName, ve.getOutputColumn()); } } @@ -83,11 +86,11 @@ public VectorSelectOperator() { } @Override - protected void initializeOp(Configuration hconf) throws HiveException { + protected Collection> initializeOp(Configuration hconf) throws HiveException { + Collection> result = super.initializeOp(hconf); // Just forward the row as is if (conf.isSelStarNoCompute()) { - initializeChildren(hconf); - return; + return null; } List objectInspectors = new ArrayList(); @@ -102,15 +105,15 @@ protected void initializeOp(Configuration hconf) throws HiveException { outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector( outputFieldNames, objectInspectors); - initializeChildren(hconf); projectedColumns = new int [vExpressions.length]; for (int i = 0; i < projectedColumns.length; i++) { projectedColumns[i] = vExpressions[i].getOutputColumn(); } + return result; } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { // Just forward the row as is if (conf.isSelStarNoCompute()) { @@ -167,4 +170,9 @@ public void setVExpressions(VectorExpression[] vExpressions) { public VectorizationContext getOuputVectorizationContext() { return vOutContext; } + + @Override + public OperatorType getType() { + return OperatorType.SELECT; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileMapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileMapper.java index c22fac5..4c5eed2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileMapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileMapper.java @@ -97,7 +97,7 @@ public void map(Object key, Object value, OutputCollector output, row[0] = key; row[1] = value; try { - mergeOp.processOp(row, 0); + mergeOp.process(row, 0); } catch (HiveException e) { abort = true; throw new IOException(e); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java index 9d2090d..a2db3b5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java @@ -54,9 +54,6 @@ protected boolean allParentsDispatched(Node nd) { @SuppressWarnings("unchecked") protected void addAllParents(Node nd) { Operator op = (Operator) nd; - if (op.getParentOperators() == null) { - return; - } getToWalk().removeAll(op.getParentOperators()); getToWalk().addAll(0, op.getParentOperators()); } @@ -68,6 +65,7 @@ protected void addAllParents(Node nd) { * current operator in the graph * @throws SemanticException */ + @Override public void walk(Node nd) throws SemanticException { if (opStack.empty() || nd != opStack.peek()) { opStack.push(nd); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index ce43120..085ef51 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -188,7 +188,7 @@ private Object checkAndConvertSMBJoin(OptimizeTezProcContext context, JoinOperat String selector = HiveConf.getVar(context.parseContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR); bigTableMatcherClass = - (Class) JavaUtils.loadClass(selector); + JavaUtils.loadClass(selector); } catch (ClassNotFoundException e) { throw new SemanticException(e.getMessage()); } @@ -273,16 +273,7 @@ private void convertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext cont } List> childOperators = mergeJoinOp.getChildOperators(); - if (childOperators == null) { - childOperators = new ArrayList>(); - mergeJoinOp.setChildOperators(childOperators); - } - List> parentOperators = mergeJoinOp.getParentOperators(); - if (parentOperators == null) { - parentOperators = new ArrayList>(); - mergeJoinOp.setParentOperators(parentOperators); - } childOperators.clear(); parentOperators.clear(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index 0514342..4d84f0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -251,10 +251,8 @@ private static void checkChildOperatorType(Operator op) if (!op.opAllowedAfterMapJoin()) { throw new SemanticException(ErrorMsg.OPERATOR_NOT_ALLOWED_WITH_MAPJOIN.getMsg()); } - if (op.getChildOperators() != null) { - for (Operator childOp : op.getChildOperators()) { - checkChildOperatorType(childOp); - } + for (Operator childOp : op.getChildOperators()) { + checkChildOperatorType(childOp); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java index 62428db..86f360d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java @@ -107,7 +107,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, listBucketCols.add(bucketCols); int numBuckets = -1; int numReduceSinks = 1; - OpTraits parentOpTraits = rs.getParentOperators().get(0).getConf().getOpTraits(); + OpTraits parentOpTraits = rs.getParentOperators().get(0).getConf().getTraits(); if (parentOpTraits != null) { numBuckets = parentOpTraits.getNumBuckets(); numReduceSinks += parentOpTraits.getNumReduceSinks(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java index f500a5e..1f6b5d7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java @@ -554,10 +554,6 @@ private boolean checkOperatorOKMapJoinConversion(Operator childOp : op.getChildOperators()) { if (!checkOperatorOKMapJoinConversion(childOp)) { return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java index ed30e76..fb20080 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java @@ -183,7 +183,7 @@ private void setStatistics(BaseWork origin, BaseWork clone) { private void setStatistics(Operator origin, Operator clone) { clone.getConf().setStatistics(origin.getConf().getStatistics()); - clone.getConf().setOpTraits(origin.getConf().getOpTraits()); + clone.getConf().setTraits(origin.getConf().getTraits()); if (origin.getChildOperators().size() == clone.getChildOperators().size()) { for (int i = 0; i < clone.getChildOperators().size(); i++) { setStatistics(origin.getChildOperators().get(i), clone.getChildOperators().get(i)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java index 1d75160..eeccc4b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java @@ -152,10 +152,6 @@ private void breakOperatorTree(Operator topOp) { topOp.setChildOperators(null); } - if (topOp.getChildOperators() == null) { - return; - } - for (Operator op : topOp.getChildOperators()) { breakOperatorTree(op); } @@ -194,6 +190,7 @@ protected void decideExecMode(List> rootTasks, Cont final Context lCtx = ctx; PathFilter p = new PathFilter() { + @Override public boolean accept(Path file) { return !lCtx.isMRTmpFileURI(file.toUri().getPath()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index 52280cc..476dfd1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -54,11 +54,11 @@ public void setVectorMode(boolean vm) { this.vectorMode = vm; } - public OpTraits getOpTraits() { + public OpTraits getTraits() { return opTraits; } - public void setOpTraits(OpTraits opTraits) { + public void setTraits(OpTraits opTraits) { this.opTraits = opTraits; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java index c39a8d7..fb4d3b4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java @@ -25,7 +25,7 @@ public Object clone() throws CloneNotSupportedException; public Statistics getStatistics(); public void setStatistics(Statistics statistics); - public OpTraits getOpTraits(); - public void setOpTraits(OpTraits opTraits); + public OpTraits getTraits(); + public void setTraits(OpTraits opTraits); public Map getOpProps(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java index 9bfb517..ea1f713 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java @@ -108,8 +108,6 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { RowSchema parentRS, ExprNodeDesc filterExpr) { Operator filter = OperatorFactory.get(new FilterDesc(filterExpr, false), new RowSchema(parentRS.getSignature())); - filter.setParentOperators(new ArrayList>()); - filter.setChildOperators(new ArrayList>()); filter.getParentOperators().add(parent); filter.getChildOperators().add(target); parent.replaceChild(target, filter); @@ -224,7 +222,7 @@ private boolean filterExists(ReduceSinkOperator target, ExprNodeDesc replaced) { private static class Vectors { - private Set[] vector; + private final Set[] vector; @SuppressWarnings("unchecked") public Vectors(int length) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java index 5ccf0a2..363e49e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java @@ -97,8 +97,6 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { RowSchema parentRS, ExprNodeDesc filterExpr) { Operator filter = OperatorFactory.get(new FilterDesc(filterExpr, false), new RowSchema(parentRS.getSignature())); - filter.setParentOperators(new ArrayList>()); - filter.setChildOperators(new ArrayList>()); filter.getParentOperators().add(parent); filter.getChildOperators().add(target); parent.replaceChild(target, filter); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java index 627b244..e400778 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java @@ -319,7 +319,7 @@ private FileSinkOperator getFileSink(AcidUtils.Operation writeType, } private void processRows(FileSinkOperator op) throws HiveException { - for (TFSORow r : rows) op.processOp(r, 0); + for (TFSORow r : rows) op.process(r, 0); op.jobCloseOp(jc, true); op.close(false); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java index 0049f53..62057d8 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java @@ -236,7 +236,7 @@ public void testScriptOperator() throws Throwable { // evaluate on row for (int i = 0; i < 5; i++) { - op.processOp(r[i].o, 0); + op.process(r[i].o, 0); } op.close(false); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java index ac3cb81..8ccd5f2 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java @@ -19,12 +19,11 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; import junit.framework.Assert; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar; @@ -39,6 +38,8 @@ */ public class TestVectorFilterOperator { + HiveConf hconf = new HiveConf(); + /** * Fundamental logic and performance tests for vector filters belong here. * @@ -96,6 +97,7 @@ private VectorFilterOperator getAVectorFilterOperator() throws HiveException { @Test public void testBasicFilterOperator() throws HiveException { VectorFilterOperator vfo = getAVectorFilterOperator(); + vfo.initialize(hconf, null); VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1); VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0); VectorExpression ve3 = new FilterExprAndExpr(); @@ -124,6 +126,7 @@ public void testBasicFilterOperator() throws HiveException { @Test public void testBasicFilterLargeData() throws HiveException { VectorFilterOperator vfo = getAVectorFilterOperator(); + vfo.initialize(hconf, null); VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1); VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0); VectorExpression ve3 = new FilterExprAndExpr(); @@ -136,7 +139,7 @@ public void testBasicFilterLargeData() throws HiveException { VectorizedRowBatch vrg = fdr.getNext(); while (vrg.size > 0) { - vfo.processOp(vrg, 0); + vfo.process(vrg, 0); vrg = fdr.getNext(); } long endTime = System.currentTimeMillis(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index fbb7ff2..f5ec7a7 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -26,8 +26,6 @@ import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; import java.lang.reflect.Constructor; -import java.math.BigDecimal; -import java.math.BigInteger; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; @@ -39,6 +37,7 @@ import java.util.Set; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromConcat; import org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromLongIterables; @@ -70,6 +69,8 @@ */ public class TestVectorGroupByOperator { + HiveConf hconf = new HiveConf(); + private static ExprNodeDesc buildColumnDesc( VectorizationContext ctx, String column, @@ -188,7 +189,7 @@ public void testMemoryPressureFlush() throws HiveException { VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); - vgo.initialize(null, null); + vgo.initialize(hconf, null); this.outputRowCount = 0; out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() { @@ -233,7 +234,7 @@ public void remove() { long countRowsProduced = 0; for (VectorizedRowBatch unit: data) { countRowsProduced += 100; - vgo.processOp(unit, 0); + vgo.process(unit, 0); if (0 < outputRowCount) { break; } @@ -529,7 +530,7 @@ public void testIntKeyTypeAggregate() throws HiveException { new String[] {"int", "bigint"}, Arrays.asList(new Object[]{ 1,null, 1, null}), Arrays.asList(new Object[]{13L,null,7L, 19L})), - buildHashMap((int)1, 20L, null, 19L)); + buildHashMap(1, 20L, null, 19L)); } @Test @@ -541,7 +542,7 @@ public void testBigintKeyTypeAggregate() throws HiveException { new String[] {"bigint", "bigint"}, Arrays.asList(new Object[]{ 1,null, 1, null}), Arrays.asList(new Object[]{13L,null,7L, 19L})), - buildHashMap((long)1L, 20L, null, 19L)); + buildHashMap(1L, 20L, null, 19L)); } @Test @@ -589,7 +590,7 @@ public void testDoubleKeyTypeAggregate() throws HiveException { new String[] {"double", "bigint"}, Arrays.asList(new Object[]{ 1,null, 1, null}), Arrays.asList(new Object[]{13L,null,7L, 19L})), - buildHashMap((double)1.0, 20L, null, 19L)); + buildHashMap(1.0, 20L, null, 19L)); } @Test @@ -794,7 +795,7 @@ public void testStdPopDecimal () throws HiveException { HiveDecimal.create(5), HiveDecimal.create(7), HiveDecimal.create(19)}), - (double) Math.sqrt(30)); + Math.sqrt(30)); } @Test @@ -808,7 +809,7 @@ public void testStdSampDecimal () throws HiveException { HiveDecimal.create(5), HiveDecimal.create(7), HiveDecimal.create(19)}), - (double) Math.sqrt(40)); + Math.sqrt(40)); } @Test @@ -1546,7 +1547,7 @@ public void testVarianceLongSingle () throws HiveException { "variance", 2, Arrays.asList(new Long[]{97L}), - (double)0.0); + 0.0); } @Test @@ -1565,12 +1566,12 @@ public void testVarianceLongNulls () throws HiveException { "variance", 2, Arrays.asList(new Long[]{null,13L, 5L,7L,19L}), - (double) 30.0); + 30.0); testAggregateLongAggregate( "variance", 2, Arrays.asList(new Long[]{13L,null,5L, 7L,19L}), - (double) 30.0); + 30.0); testAggregateLongAggregate( "variance", 2, @@ -1633,7 +1634,7 @@ public void testStdLongSimple () throws HiveException { "std", 2, Arrays.asList(new Long[]{13L,5L,7L,19L}), - (double) Math.sqrt(30)); + Math.sqrt(30)); } @Test @@ -1673,7 +1674,7 @@ public void testStdDevSampSimple () throws HiveException { "stddev_samp", 2, Arrays.asList(new Long[]{13L,5L,7L,19L}), - (double) Math.sqrt(40)); + Math.sqrt(40)); } @Test @@ -1731,7 +1732,7 @@ private void testMultiKey( VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); - vgo.initialize(null, null); + vgo.initialize(hconf, null); out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() { private int rowIndex; @@ -1801,7 +1802,7 @@ public void inspectRow(Object row, int tag) throws HiveException { }.init(aggregateName, expected, keys)); for (VectorizedRowBatch unit: data) { - vgo.processOp(unit, 0); + vgo.process(unit, 0); } vgo.close(false); @@ -1845,7 +1846,7 @@ private void testKeyTypeAggregate( VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); - vgo.initialize(null, null); + vgo.initialize(hconf, null); out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() { private int rowIndex; @@ -1914,7 +1915,7 @@ public void inspectRow(Object row, int tag) throws HiveException { }.init(aggregateName, expected, keys)); for (VectorizedRowBatch unit: data) { - vgo.processOp(unit, 0); + vgo.process(unit, 0); } vgo.close(false); @@ -2079,18 +2080,18 @@ public void validate(String key, Object expected, Object result) { assertEquals(key, null, arr[0]); } else if (arr[0] instanceof LongWritable) { LongWritable lw = (LongWritable) arr[0]; - assertEquals(key, (Long) expected, (Long) lw.get()); + assertEquals(key, expected, lw.get()); } else if (arr[0] instanceof Text) { Text tx = (Text) arr[0]; String sbw = tx.toString(); - assertEquals(key, (String) expected, sbw); + assertEquals(key, expected, sbw); } else if (arr[0] instanceof DoubleWritable) { DoubleWritable dw = (DoubleWritable) arr[0]; - assertEquals (key, (Double) expected, (Double) dw.get()); + assertEquals (key, expected, dw.get()); } else if (arr[0] instanceof Double) { - assertEquals (key, (Double) expected, (Double) arr[0]); + assertEquals (key, expected, arr[0]); } else if (arr[0] instanceof Long) { - assertEquals (key, (Long) expected, (Long) arr[0]); + assertEquals (key, expected, arr[0]); } else if (arr[0] instanceof HiveDecimalWritable) { HiveDecimalWritable hdw = (HiveDecimalWritable) arr[0]; HiveDecimal hd = hdw.getHiveDecimal(); @@ -2126,10 +2127,10 @@ public void validate(String key, Object expected, Object result) { if (vals[1] instanceof DoubleWritable) { DoubleWritable dw = (DoubleWritable) vals[1]; - assertEquals (key, (Double) expected, (Double) (dw.get() / lw.get())); + assertEquals (key, expected, dw.get() / lw.get()); } else if (vals[1] instanceof HiveDecimalWritable) { HiveDecimalWritable hdw = (HiveDecimalWritable) vals[1]; - assertEquals (key, (HiveDecimal) expected, hdw.getHiveDecimal().divide(HiveDecimal.create(lw.get()))); + assertEquals (key, expected, hdw.getHiveDecimal().divide(HiveDecimal.create(lw.get()))); } } } @@ -2241,10 +2242,10 @@ public void testAggregateCountStarIterable ( VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); - vgo.initialize(null, null); + vgo.initialize(hconf, null); for (VectorizedRowBatch unit: data) { - vgo.processOp(unit, 0); + vgo.process(unit, 0); } vgo.close(false); @@ -2272,10 +2273,10 @@ public void testAggregateCountReduceIterable ( VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); - vgo.initialize(null, null); + vgo.initialize(hconf, null); for (VectorizedRowBatch unit: data) { - vgo.processOp(unit, 0); + vgo.process(unit, 0); } vgo.close(false); @@ -2303,10 +2304,10 @@ public void testAggregateStringIterable ( VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); - vgo.initialize(null, null); + vgo.initialize(hconf, null); for (VectorizedRowBatch unit: data) { - vgo.processOp(unit, 0); + vgo.process(unit, 0); } vgo.close(false); @@ -2321,35 +2322,34 @@ public void testAggregateStringIterable ( } public void testAggregateDecimalIterable ( - String aggregateName, - Iterable data, - Object expected) throws HiveException { - List mapColumnNames = new ArrayList(); - mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); +String aggregateName, Iterable data, + Object expected) throws HiveException { + List mapColumnNames = new ArrayList(); + mapColumnNames.add("A"); + VectorizationContext ctx = new VectorizationContext(mapColumnNames); - GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", - TypeInfoFactory.getDecimalTypeInfo(30, 4)); + GroupByDesc desc = + buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4)); - VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); + VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); - FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); - vgo.initialize(null, null); + FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); + vgo.initialize(hconf, null); - for (VectorizedRowBatch unit: data) { - vgo.processOp(unit, 0); - } - vgo.close(false); + for (VectorizedRowBatch unit : data) { + vgo.process(unit, 0); + } + vgo.close(false); - List outBatchList = out.getCapturedRows(); - assertNotNull(outBatchList); - assertEquals(1, outBatchList.size()); + List outBatchList = out.getCapturedRows(); + assertNotNull(outBatchList); + assertEquals(1, outBatchList.size()); - Object result = outBatchList.get(0); + Object result = outBatchList.get(0); - Validator validator = getValidator(aggregateName); - validator.validate("_total", expected, result); - } + Validator validator = getValidator(aggregateName); + validator.validate("_total", expected, result); + } public void testAggregateDoubleIterable ( @@ -2366,10 +2366,10 @@ public void testAggregateDoubleIterable ( VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); - vgo.initialize(null, null); + vgo.initialize(hconf, null); for (VectorizedRowBatch unit: data) { - vgo.processOp(unit, 0); + vgo.process(unit, 0); } vgo.close(false); @@ -2399,7 +2399,7 @@ public void testAggregateLongIterable ( vgo.initialize(null, null); for (VectorizedRowBatch unit: data) { - vgo.processOp(unit, 0); + vgo.process(unit, 0); } vgo.close(false); @@ -2430,7 +2430,7 @@ public void testAggregateLongKeyIterable ( VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); - vgo.initialize(null, null); + vgo.initialize(hconf, null); out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() { private int rowIndex; @@ -2469,7 +2469,7 @@ public void inspectRow(Object row, int tag) throws HiveException { }.init(aggregateName, expected, keys)); for (VectorizedRowBatch unit: data) { - vgo.processOp(unit, 0); + vgo.process(unit, 0); } vgo.close(false); @@ -2496,7 +2496,7 @@ public void testAggregateStringKeyIterable ( VectorGroupByOperator vgo = new VectorGroupByOperator(ctx, desc); FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(vgo); - vgo.initialize(null, null); + vgo.initialize(hconf, null); out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() { private int rowIndex; @@ -2536,7 +2536,7 @@ public void inspectRow(Object row, int tag) throws HiveException { }.init(aggregateName, expected, keys)); for (VectorizedRowBatch unit: data) { - vgo.processOp(unit, 0); + vgo.process(unit, 0); } vgo.close(false); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java index 604cfb5..aa1d89d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorLimitOperator.java @@ -67,7 +67,7 @@ private void validateVectorLimitOperator(int limit, int batchSize, int expectedB lo.initialize(new Configuration(), null); // Process the batch - lo.processOp(vrb, 0); + lo.process(vrb, 0); // Verify batch size Assert.assertEquals(vrb.size, expectedBatchSize); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index 3c004a1..b482029 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -119,7 +119,7 @@ public void testSelectOperator() throws HiveException { VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch( VectorizedRowBatch.DEFAULT_SIZE, 4, 17); - vso.processOp(vrg, 0); + vso.process(vrg, 0); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeCaptureOutputOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeCaptureOutputOperator.java index 43458d9..93a6aed 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeCaptureOutputOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeCaptureOutputOperator.java @@ -20,8 +20,9 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.Arrays; +import java.util.Collection; import java.util.List; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.Operator; @@ -36,17 +37,17 @@ public class FakeCaptureOutputOperator extends Operator implements Serializable { private static final long serialVersionUID = 1L; - + public interface OutputInspector { public void inspectRow(Object row, int tag) throws HiveException; } - + private OutputInspector outputInspector; - + public void setOutputInspector(OutputInspector outputInspector) { this.outputInspector = outputInspector; } - + public OutputInspector getOutputInspector() { return outputInspector; } @@ -67,18 +68,20 @@ public static FakeCaptureOutputOperator addCaptureOutputChild( return out; } - + public List getCapturedRows() { return rows; } @Override - public void initializeOp(Configuration conf) throws HiveException { + public Collection> initializeOp(Configuration conf) throws HiveException { + Collection> result = super.initializeOp(conf); rows = new ArrayList(); + return result; } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { rows.add(row); if (null != outputInspector) { outputInspector.inspectRow(row, tag); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorDataSourceOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorDataSourceOperator.java index 22c2ce0..fe990f8 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorDataSourceOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorDataSourceOperator.java @@ -20,7 +20,9 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.Operator; @@ -60,11 +62,12 @@ public FakeVectorDataSourceOperator( } @Override - public void initializeOp(Configuration conf) throws HiveException { + public Collection> initializeOp(Configuration conf) throws HiveException { + return super.initializeOp(conf); } @Override - public void processOp(Object row, int tag) throws HiveException { + public void process(Object row, int tag) throws HiveException { for (VectorizedRowBatch unit: source) { forward(unit, null); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/testutil/OperatorTestUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/testutil/OperatorTestUtils.java index b3415eb..86c7027 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/testutil/OperatorTestUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/testutil/OperatorTestUtils.java @@ -67,7 +67,7 @@ public static void assertResults(Operator selectOp, CollectOperator InspectableObject [] sourceData, InspectableObject [] expected) throws HiveException { InspectableObject resultRef = new InspectableObject(); for (int i = 0; i < sourceData.length; i++) { - selectOp.processOp(sourceData[i].o, 0); + selectOp.process(sourceData[i].o, 0); collectOp.retrieve(resultRef); StructObjectInspector expectedOi = (StructObjectInspector) expected[i].oi; List expectedFields = expectedOi.getAllStructFieldRefs();