diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 95c5c0e..ae558a2 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2438,6 +2438,15 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "This flag should be set to true to enable the new vectorization\n" + "of queries using ReduceSink.\ni" + "The default value is true."), + HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT("hive.vectorized.use.vectorized.input.format", true, + "This flag should be set to true to enable vectorizing with vectorized input file format capable SerDe.\n" + + "The default value is true."), + HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE("hive.vectorized.use.vector.serde.deserialize", false, + "This flag should be set to true to enable vectorizing rows using vector deserialize.\n" + + "The default value is false."), + HIVE_VECTORIZATION_USE_ROW_DESERIALIZE("hive.vectorized.use.row.serde.deserialize", false, + "This flag should be set to true to enable vectorizing using row deserialize.\n" + + "The default value is false."), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " + "whether to check, convert, and normalize partition value to conform to its column type in " + "partition operations including but not limited to insert, such as alter, describe etc."), diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 9fb79a5..298f788 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -102,7 +102,7 @@ sourceInputFormat.getRecordReader(split, job, reporter); return rr; } - boolean isVectorMode = Utilities.isVectorMode(job); + boolean isVectorMode = Utilities.getUseVectorizedInputFileFormat(job); if (!isVectorMode) { LlapIoImpl.LOG.error("No LLAP IO in non-vectorized mode"); throw new UnsupportedOperationException("No LLAP IO in non-vectorized mode"); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapOperator.java new file mode 100644 index 0000000..3247c5d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapOperator.java @@ -0,0 +1,180 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.MapOperator.MapOpCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; + + +/** + * Map operator. This triggers overall map side processing. This is a little + * different from regular operators in that it starts off by processing a + * Writable data structure from a Table (instead of a Hive Object). + **/ +@SuppressWarnings("deprecation") +public abstract class AbstractMapOperator extends Operator implements Serializable, Cloneable { + + private static final long serialVersionUID = 1L; + + /** + * Initialization call sequence: + * + * (Operator) setConf(MapWork conf); + * (Operator) initialize(Configuration hconf, ObjectInspector[] inputOIs); + * + * (AbstractMapOperator) setChildren(Configuration hconf) + * + * (Operator) passExecContext(ExecMapperContext execContext) + * (Operator) initializeLocalWork(Configuration hconf) + * + * (AbstractMapOperator) initializeMapOperator(Configuration hconf) + * + * [ (AbstractMapOperator) initializeContexts() ] // exec.tez.MapRecordProcessor only. + * + * (Operator) setReporter(Reporter rep) + * + */ + /** + * Counter. + * + */ + public static enum Counter { + DESERIALIZE_ERRORS, + RECORDS_IN + } + + protected final transient LongWritable deserialize_error_count = new LongWritable(); + protected final transient LongWritable recordCounter = new LongWritable(); + protected transient long numRows = 0; + + private final Map connectedOperators + = new TreeMap(); + + private transient final Map normalizedPaths = new HashMap(); + + private Path normalizePath(String onefile, boolean schemaless) { + //creating Path is expensive, so cache the corresponding + //Path object in normalizedPaths + Path path = normalizedPaths.get(onefile); + if (path == null) { + path = new Path(onefile); + if (schemaless && path.toUri().getScheme() != null) { + path = new Path(path.toUri().getPath()); + } + normalizedPaths.put(onefile, path); + } + return path; + } + + protected String getNominalPath(Path fpath) { + String nominal = null; + boolean schemaless = fpath.toUri().getScheme() == null; + for (String onefile : conf.getPathToAliases().keySet()) { + Path onepath = normalizePath(onefile, schemaless); + Path curfpath = fpath; + if(!schemaless && onepath.toUri().getScheme() == null) { + curfpath = new Path(fpath.toUri().getPath()); + } + // check for the operators who will process rows coming to this Map Operator + if (onepath.toUri().relativize(curfpath.toUri()).equals(curfpath.toUri())) { + // not from this + continue; + } + if (nominal != null) { + throw new IllegalStateException("Ambiguous input path " + fpath); + } + nominal = onefile; + } + if (nominal == null) { + throw new IllegalStateException("Invalid input path " + fpath); + } + return nominal; + } + + public abstract void initEmptyInputChildren(List> children, Configuration hconf) + throws SerDeException, Exception; + + + /** Kryo ctor. */ + protected AbstractMapOperator() { + super(); + } + + public AbstractMapOperator(CompilationOpContext ctx) { + super(ctx); + } + + public abstract void setChildren(Configuration hconf) throws Exception; + + + public void initializeMapOperator(Configuration hconf) throws HiveException { + // set that parent initialization is done and call initialize on children + state = State.INIT; + + statsMap.put(Counter.DESERIALIZE_ERRORS.toString(), deserialize_error_count); + + numRows = 0; + + String context = hconf.get(Operator.CONTEXT_NAME_KEY, ""); + if (context != null && !context.isEmpty()) { + context = "_" + context.replace(" ","_"); + } + statsMap.put(Counter.RECORDS_IN + context, recordCounter); + } + + public abstract void initializeContexts() throws HiveException; + + public abstract Deserializer getCurrentDeserializer(); + + public abstract void process(Writable value) throws HiveException; + + @Override + public void closeOp(boolean abort) throws HiveException { + recordCounter.set(numRows); + super.closeOp(abort); + } + + public void clearConnectedOperators() { + connectedOperators.clear(); + } + + public void setConnectedOperators(int tag, DummyStoreOperator dummyOp) { + connectedOperators.put(tag, dummyOp); + } + + public Map getConnectedOperators() { + return connectedOperators; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 4608f70..d8e235d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -18,10 +18,8 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -30,9 +28,6 @@ import java.util.Map.Entry; import java.util.Properties; import java.util.Set; -import java.util.TreeMap; -import java.util.concurrent.Future; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; @@ -42,7 +37,6 @@ import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.RecordIdentifier; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -59,14 +53,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; @@ -78,25 +70,11 @@ * Writable data structure from a Table (instead of a Hive Object). **/ @SuppressWarnings("deprecation") -public class MapOperator extends Operator implements Serializable, Cloneable { +public class MapOperator extends AbstractMapOperator { private static final long serialVersionUID = 1L; - /** - * Counter. - * - */ - public static enum Counter { - DESERIALIZE_ERRORS, - RECORDS_IN - } - - private final transient LongWritable deserialize_error_count = new LongWritable(); - private final transient LongWritable recordCounter = new LongWritable(); - protected transient long numRows = 0; protected transient long cntr = 1; - private final Map connectedOperators - = new TreeMap(); protected transient long logEveryNRows = 0; // input path --> {operator --> context} @@ -108,7 +86,6 @@ // context for current input file protected transient MapOpCtx[] currentCtxs; - private transient final Map normalizedPaths = new HashMap(); protected static class MapOpCtx { @@ -439,31 +416,6 @@ private void initOperatorContext(List> children } } - private String getNominalPath(Path fpath) { - String nominal = null; - boolean schemaless = fpath.toUri().getScheme() == null; - for (String onefile : conf.getPathToAliases().keySet()) { - Path onepath = normalizePath(onefile, schemaless); - Path curfpath = fpath; - if(!schemaless && onepath.toUri().getScheme() == null) { - curfpath = new Path(fpath.toUri().getPath()); - } - // check for the operators who will process rows coming to this Map Operator - if (onepath.toUri().relativize(curfpath.toUri()).equals(curfpath.toUri())) { - // not from this - continue; - } - if (nominal != null) { - throw new IllegalStateException("Ambiguous input path " + fpath); - } - nominal = onefile; - } - if (nominal == null) { - throw new IllegalStateException("Invalid input path " + fpath); - } - return nominal; - } - /** Kryo ctor. */ protected MapOperator() { super(); @@ -479,32 +431,17 @@ public void initializeOp(Configuration hconf) throws HiveException { } public void initializeMapOperator(Configuration hconf) throws HiveException { - // set that parent initialization is done and call initialize on children - state = State.INIT; - statsMap.put(Counter.DESERIALIZE_ERRORS.toString(), deserialize_error_count); + super.initializeMapOperator(hconf); - numRows = 0; cntr = 1; logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS); - String context = hconf.get(Operator.CONTEXT_NAME_KEY, ""); - if (context != null && !context.isEmpty()) { - context = "_" + context.replace(" ","_"); - } - statsMap.put(Counter.RECORDS_IN + context, recordCounter); - for (Entry, StructObjectInspector> entry : childrenOpToOI.entrySet()) { Operator child = entry.getKey(); child.initialize(hconf, new ObjectInspector[] {entry.getValue()}); } } - @Override - public void closeOp(boolean abort) throws HiveException { - recordCounter.set(numRows); - super.closeOp(abort); - } - // Find context for current input file @Override public void cleanUpInputFileChangedOp() throws HiveException { @@ -534,20 +471,6 @@ public void cleanUpInputFileChangedOp() throws HiveException { currentCtxs = contexts.values().toArray(new MapOpCtx[contexts.size()]); } - private Path normalizePath(String onefile, boolean schemaless) { - //creating Path is expensive, so cache the corresponding - //Path object in normalizedPaths - Path path = normalizedPaths.get(onefile); - if (path == null) { - path = new Path(onefile); - if (schemaless && path.toUri().getScheme() != null) { - path = new Path(path.toUri().getPath()); - } - normalizedPaths.put(onefile, path); - } - return path; - } - public void process(Writable value) throws HiveException { // A mapper can span multiple files/partitions. // The serializers need to be reset if the input file changed @@ -704,16 +627,4 @@ public Deserializer getCurrentDeserializer() { return currentCtxs[0].deserializer; } - - public void clearConnectedOperators() { - connectedOperators.clear(); - } - - public void setConnectedOperators(int tag, DummyStoreOperator dummyOp) { - connectedOperators.put(tag, dummyOp); - } - - public Map getConnectedOperators() { - return connectedOperators; - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index ab0635e..e86f554 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -215,7 +215,7 @@ public static final String MAPRED_MAPPER_CLASS = "mapred.mapper.class"; public static final String MAPRED_REDUCER_CLASS = "mapred.reducer.class"; public static final String HIVE_ADDED_JARS = "hive.added.jars"; - public static final String VECTOR_MODE = "VECTOR_MODE"; + public static final String USE_VECTORIZED_INPUT_FILE_FORMAT = "USE_VECTORIZED_INPUT_FILE_FORMAT"; public static String MAPNAME = "Map "; public static String REDUCENAME = "Reducer "; @@ -3257,21 +3257,27 @@ private static void resetUmaskInConf(Configuration conf, boolean unsetUmask, Str * and vectorization is allowed. The plan may be configured for vectorization * but vectorization disallowed eg. for FetchOperator execution. */ - public static boolean isVectorMode(Configuration conf) { - if (conf.get(VECTOR_MODE) != null) { + public static boolean getUseVectorizedInputFileFormat(Configuration conf) { + if (conf.get(USE_VECTORIZED_INPUT_FILE_FORMAT) != null) { // this code path is necessary, because with HS2 and client // side split generation we end up not finding the map work. // This is because of thread local madness (tez split // generation is multi-threaded - HS2 plan cache uses thread // locals). - return conf.getBoolean(VECTOR_MODE, false); + return conf.getBoolean(USE_VECTORIZED_INPUT_FILE_FORMAT, false); } else { return HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) && Utilities.getPlanPath(conf) != null - && Utilities.getMapWork(conf).getVectorMode(); + && Utilities.getMapWork(conf).getUseVectorizedInputFileFormat(); } } + + public static boolean getUseVectorizedInputFileFormat(Configuration conf, MapWork mapWork) { + return HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) + && mapWork.getUseVectorizedInputFileFormat(); + } + /** * @param conf * @return the configured VectorizedRowBatchCtx for a MapWork task. @@ -3288,11 +3294,6 @@ public static VectorizedRowBatchCtx getVectorizedRowBatchCtx(Configuration conf) return result; } - public static boolean isVectorMode(Configuration conf, MapWork mapWork) { - return HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) - && mapWork.getVectorMode(); - } - public static void clearWorkMapForConf(Configuration conf) { // Remove cached query plans for the current query only Path mapPath = getPlanPath(conf, MAP_PLAN_NAME); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java index c34dd1f..f90a788 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java @@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; import org.apache.hadoop.hive.ql.exec.MapOperator; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.Operator; @@ -59,7 +60,7 @@ */ public class ExecMapper extends MapReduceBase implements Mapper { - private MapOperator mo; + private AbstractMapOperator mo; private OutputCollector oc; private JobConf jc; private boolean abort = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java index d8fe35f..48dfedc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java @@ -25,6 +25,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; import org.apache.hadoop.hive.ql.exec.MapOperator; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.Operator; @@ -55,7 +56,7 @@ */ public class SparkMapRecordHandler extends SparkRecordHandler { private static final Logger LOG = LoggerFactory.getLogger(SparkMapRecordHandler.class); - private MapOperator mo; + private AbstractMapOperator mo; private MapredLocalWork localWork = null; private boolean isLogInfoEnabled = false; private ExecMapperContext execContext; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 8aca779..147367e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -626,9 +626,9 @@ private Vertex createVertex(JobConf conf, MapWork mapWork, // generation we end up not finding the map work. This is // because of thread local madness (tez split generation is // multi-threaded - HS2 plan cache uses thread locals). Setting - // VECTOR_MODE causes the split gen code to use the conf instead + // USE_VECTORIZED_INPUT_FILE_FORMAT causes the split gen code to use the conf instead // of the map work. - conf.setBoolean(Utilities.VECTOR_MODE, mapWork.getVectorMode()); + conf.setBoolean(Utilities.USE_VECTORIZED_INPUT_FILE_FORMAT, mapWork.getUseVectorizedInputFileFormat()); dataSource = MRInputHelpers.configureMRInputWithLegacySplitGeneration(conf, new Path(tezDir, "split_" + mapWork.getName().replaceAll(" ", "_")), true); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java index 0584ad8..9a9f43a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java @@ -33,6 +33,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; import org.apache.hadoop.hive.llap.io.api.LlapProxy; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; @@ -75,8 +76,8 @@ public static final Logger l4j = LoggerFactory.getLogger(MapRecordProcessor.class); protected static final String MAP_PLAN_KEY = "__MAP_PLAN__"; - private MapOperator mapOp; - private final List mergeMapOpList = new ArrayList(); + private AbstractMapOperator mapOp; + private final List mergeMapOpList = new ArrayList(); private MapRecordSource[] sources; private final Map multiMRInputMap = new HashMap(); private int position; @@ -183,7 +184,7 @@ public Object call() { boolean fromCache = false; if (mergeWorkList != null) { - MapOperator mergeMapOp = null; + AbstractMapOperator mergeMapOp = null; for (BaseWork mergeWork : mergeWorkList) { MapWork mergeMapWork = (MapWork) mergeWork; if (mergeMapWork.getVectorMode()) { @@ -261,7 +262,7 @@ public Object call() { initializeMapRecordSources(); mapOp.initializeMapOperator(jconf); if ((mergeMapOpList != null) && mergeMapOpList.isEmpty() == false) { - for (MapOperator mergeMapOp : mergeMapOpList) { + for (AbstractMapOperator mergeMapOp : mergeMapOpList) { jconf.set(Utilities.INPUT_NAME, mergeMapOp.getConf().getName()); mergeMapOp.initializeMapOperator(jconf); } @@ -309,7 +310,7 @@ private void initializeMapRecordSources() throws Exception { reader = legacyMRInput.getReader(); } sources[position].init(jconf, mapOp, reader); - for (MapOperator mapOp : mergeMapOpList) { + for (AbstractMapOperator mapOp : mergeMapOpList) { int tag = mapOp.getConf().getTag(); sources[tag] = new MapRecordSource(); String inputName = mapOp.getConf().getName(); @@ -326,7 +327,7 @@ private void initializeMapRecordSources() throws Exception { @SuppressWarnings("deprecation") private KeyValueReader getKeyValueReader(Collection keyValueReaders, - MapOperator mapOp) + AbstractMapOperator mapOp) throws Exception { List kvReaderList = new ArrayList(keyValueReaders); // this sets up the map operator contexts correctly @@ -394,7 +395,7 @@ void close(){ } mapOp.close(abort); if (mergeMapOpList.isEmpty() == false) { - for (MapOperator mergeMapOp : mergeMapOpList) { + for (AbstractMapOperator mergeMapOp : mergeMapOpList) { mergeMapOp.close(abort); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java index b53c933..add7d08 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java @@ -21,7 +21,7 @@ import java.io.IOException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.ql.exec.MapOperator; +import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.Writable; @@ -39,11 +39,11 @@ public static final Logger LOG = LoggerFactory.getLogger(MapRecordSource.class); private ExecMapperContext execContext = null; - private MapOperator mapOp = null; + private AbstractMapOperator mapOp = null; private KeyValueReader reader = null; private final boolean grouped = false; - void init(JobConf jconf, MapOperator mapOp, KeyValueReader reader) throws IOException { + void init(JobConf jconf, AbstractMapOperator mapOp, KeyValueReader reader) throws IOException { execContext = mapOp.getExecContext(); this.mapOp = mapOp; this.reader = reader; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java index c591288..22e0f35 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java @@ -49,7 +49,7 @@ private transient boolean firstBatch; - private transient VectorExtractRowDynBatch vectorExtractRowDynBatch; + private transient VectorExtractRow vectorExtractRow; protected transient Object[] singleRow; @@ -91,16 +91,14 @@ public void process(Object data, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) data; if (firstBatch) { - vectorExtractRowDynBatch = new VectorExtractRowDynBatch(); - vectorExtractRowDynBatch.init((StructObjectInspector) inputObjInspectors[0], vContext.getProjectedColumns()); + vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init((StructObjectInspector) inputObjInspectors[0], vContext.getProjectedColumns()); - singleRow = new Object[vectorExtractRowDynBatch.getCount()]; + singleRow = new Object[vectorExtractRow.getCount()]; firstBatch = false; } - vectorExtractRowDynBatch.setBatchOnEntry(batch); - ObjectInspector rowInspector = inputObjInspectors[0]; try { Writable writableRow; @@ -108,7 +106,7 @@ public void process(Object data, int tag) throws HiveException { int selected[] = batch.selected; for (int logical = 0 ; logical < batch.size; logical++) { int batchIndex = selected[logical]; - vectorExtractRowDynBatch.extractRow(batchIndex, singleRow); + vectorExtractRow.extractRow(batch, batchIndex, singleRow); writableRow = serializer.serialize(singleRow, rowInspector); writableRow.write(buffer); if (buffer.getLength() > MAX_SIZE) { @@ -120,7 +118,7 @@ public void process(Object data, int tag) throws HiveException { } } else { for (int batchIndex = 0 ; batchIndex < batch.size; batchIndex++) { - vectorExtractRowDynBatch.extractRow(batchIndex, singleRow); + vectorExtractRow.extractRow(batch, batchIndex, singleRow); writableRow = serializer.serialize(singleRow, rowInspector); writableRow.write(buffer); if (buffer.getLength() > MAX_SIZE) { @@ -136,7 +134,5 @@ public void process(Object data, int tag) throws HiveException { } forward(data, rowInspector); - - vectorExtractRowDynBatch.forgetBatchOnExit(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index de0300a..9d370c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -18,511 +18,126 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.ArrayList; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.*; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.io.ByteWritable; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.ql.plan.VectorPartitionConversion; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.BooleanWritable; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hive.common.util.DateUtils; /** * This class assigns specified columns of a row from a Writable row Object[]. * * The caller provides the hive type names and target column numbers in the order desired to * assign from the Writable row Object[]. - * - * This class is abstract to allow the subclasses to control batch reuse. */ -public abstract class VectorAssignRow { +public class VectorAssignRow { private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(VectorAssignRow.class); - protected abstract class Assigner { - protected int columnIndex; - - Assigner(int columnIndex) { - this.columnIndex = columnIndex; - } - - public int getColumnIndex() { - return columnIndex; - } - - abstract void setColumnVector(VectorizedRowBatch batch); - - abstract void forgetColumnVector(); - - abstract void assign(int batchIndex, Object object); - } - - private class VoidAssigner extends Assigner { - - VoidAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - } - - @Override - void forgetColumnVector() { - } - - @Override - void assign(int batchIndex, Object object) { - // This is no-op, there is no column to assign to and the object is expected to be null. - assert (object == null); - } - } - - private abstract class AbstractLongAssigner extends Assigner { - - protected LongColumnVector colVector; - protected long[] vector; - - AbstractLongAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (LongColumnVector) batch.cols[columnIndex]; - vector = colVector.vector; - } - - @Override - void forgetColumnVector() { - colVector = null; - vector = null; - } - } - - protected class BooleanAssigner extends AbstractLongAssigner { - - BooleanAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - BooleanWritable bw = (BooleanWritable) object; - vector[batchIndex] = (bw.get() ? 1 : 0); - colVector.isNull[batchIndex] = false; - } - } - } - - protected class ByteAssigner extends AbstractLongAssigner { - - ByteAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - ByteWritable bw = (ByteWritable) object; - vector[batchIndex] = bw.get(); - colVector.isNull[batchIndex] = false; - } - } - } - - private class ShortAssigner extends AbstractLongAssigner { - - ShortAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - ShortWritable sw = (ShortWritable) object; - vector[batchIndex] = sw.get(); - colVector.isNull[batchIndex] = false; - } - } - } - - private class IntAssigner extends AbstractLongAssigner { - - IntAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - IntWritable iw = (IntWritable) object; - vector[batchIndex] = iw.get(); - colVector.isNull[batchIndex] = false; - } - } - } - - private class LongAssigner extends AbstractLongAssigner { - - LongAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - LongWritable lw = (LongWritable) object; - vector[batchIndex] = lw.get(); - colVector.isNull[batchIndex] = false; - } - } - } - - private class DateAssigner extends AbstractLongAssigner { - - DateAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - DateWritable bw = (DateWritable) object; - vector[batchIndex] = bw.getDays(); - colVector.isNull[batchIndex] = false; - } - } - } - - private abstract class AbstractTimestampAssigner extends Assigner { - - protected TimestampColumnVector colVector; - - AbstractTimestampAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (TimestampColumnVector) batch.cols[columnIndex]; - } - - @Override - void forgetColumnVector() { - colVector = null; - } - } - - private class TimestampAssigner extends AbstractTimestampAssigner { - - TimestampAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - colVector.set(batchIndex, ((TimestampWritable) object).getTimestamp()); - colVector.isNull[batchIndex] = false; - } - } - } - - private class IntervalYearMonthAssigner extends AbstractLongAssigner { - - IntervalYearMonthAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - HiveIntervalYearMonthWritable iymw = (HiveIntervalYearMonthWritable) object; - HiveIntervalYearMonth iym = iymw.getHiveIntervalYearMonth(); - vector[batchIndex] = iym.getTotalMonths(); - colVector.isNull[batchIndex] = false; - } - } - } - - private abstract class AbstractIntervalDayTimeAssigner extends Assigner { - - protected IntervalDayTimeColumnVector colVector; - - AbstractIntervalDayTimeAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (IntervalDayTimeColumnVector) batch.cols[columnIndex]; - } - - @Override - void forgetColumnVector() { - colVector = null; - } - } - - private class IntervalDayTimeAssigner extends AbstractIntervalDayTimeAssigner { - - IntervalDayTimeAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - HiveIntervalDayTimeWritable idtw = (HiveIntervalDayTimeWritable) object; - HiveIntervalDayTime idt = idtw.getHiveIntervalDayTime(); - colVector.set(batchIndex, idt); - colVector.isNull[batchIndex] = false; - } - } - } - - private abstract class AbstractDoubleAssigner extends Assigner { - - protected DoubleColumnVector colVector; - protected double[] vector; - - AbstractDoubleAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (DoubleColumnVector) batch.cols[columnIndex]; - vector = colVector.vector; - } - - @Override - void forgetColumnVector() { - colVector = null; - vector = null; - } - } - - private class FloatAssigner extends AbstractDoubleAssigner { - - FloatAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - FloatWritable fw = (FloatWritable) object; - vector[batchIndex] = fw.get(); - colVector.isNull[batchIndex] = false; - } - } - } - - private class DoubleAssigner extends AbstractDoubleAssigner { - - DoubleAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - DoubleWritable dw = (DoubleWritable) object; - vector[batchIndex] = dw.get(); - colVector.isNull[batchIndex] = false; - } - } - } - - private abstract class AbstractBytesAssigner extends Assigner { - - protected BytesColumnVector colVector; - - AbstractBytesAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (BytesColumnVector) batch.cols[columnIndex]; - } - - @Override - void forgetColumnVector() { - colVector = null; - } - } + private Assigner createConversionAssigner(TypeInfo sourceTypeInfo, + TypeInfo targetTypeInfo, int columnIndex) throws HiveException { - private class BinaryAssigner extends AbstractBytesAssigner { - - BinaryAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - BytesWritable bw = (BytesWritable) object; - colVector.setVal(batchIndex, bw.getBytes(), 0, bw.getLength()); - colVector.isNull[batchIndex] = false; - } - } - } - - private class StringAssigner extends AbstractBytesAssigner { - - StringAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - Text tw = (Text) object; - colVector.setVal(batchIndex, tw.getBytes(), 0, tw.getLength()); - colVector.isNull[batchIndex] = false; - } - } - } - - private class VarCharAssigner extends AbstractBytesAssigner { - - VarCharAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - // We store VARCHAR type stripped of pads. - HiveVarchar hiveVarchar; - if (object instanceof HiveVarchar) { - hiveVarchar = (HiveVarchar) object; - } else { - hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar(); - } - byte[] bytes = hiveVarchar.getValue().getBytes(); - colVector.setVal(batchIndex, bytes, 0, bytes.length); - colVector.isNull[batchIndex] = false; - } - } - } + PrimitiveObjectInspector sourcePrimitiveObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) sourceTypeInfo); - private class CharAssigner extends AbstractBytesAssigner { + PrimitiveTypeInfo targetPrimitiveTypeInfo = (PrimitiveTypeInfo) targetTypeInfo; + PrimitiveCategory targetPrimitiveCategory = + targetPrimitiveTypeInfo.getPrimitiveCategory(); - CharAssigner(int columnIndex) { - super(columnIndex); + Assigner assigner = null; + switch (targetPrimitiveCategory) { + case VOID: + assigner = new ConvertToVoidAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case BOOLEAN: + assigner = new ConvertToBooleanAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case BYTE: + assigner = new ConvertToByteAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case SHORT: + assigner = new ConvertToShortAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case INT: + assigner = new ConvertToIntAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case LONG: + assigner = new ConvertToLongAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case TIMESTAMP: + assigner = new ConvertToTimestampAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case DATE: + assigner = new ConvertToDateAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case FLOAT: + assigner = new ConvertToFloatAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case DOUBLE: + assigner = new ConvertToDoubleAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case BINARY: + assigner = new ConvertToBinaryAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case STRING: + assigner = new ConvertToStringAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case VARCHAR: + assigner = new ConvertToVarcharAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case CHAR: + assigner = new ConvertToCharAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case DECIMAL: + assigner = new ConvertToDecimalAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case INTERVAL_YEAR_MONTH: + assigner = new ConvertToIntervalYearMonthAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + case INTERVAL_DAY_TIME: + assigner = new ConvertToIntervalDayTimeAssigner(sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, columnIndex); + break; + default: + throw new HiveException("No vector row assigner for target primitive category " + + targetPrimitiveCategory); } - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - // We store CHAR type stripped of pads. - HiveChar hiveChar; - if (object instanceof HiveChar) { - hiveChar = (HiveChar) object; - } else { - hiveChar = ((HiveCharWritable) object).getHiveChar(); - } - - // We store CHAR in vector row batch with padding stripped. - byte[] bytes = hiveChar.getStrippedValue().getBytes(); - colVector.setVal(batchIndex, bytes, 0, bytes.length); - colVector.isNull[batchIndex] = false; - } - } + return assigner; } - private class DecimalAssigner extends Assigner { - - protected DecimalColumnVector colVector; - - DecimalAssigner(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (DecimalColumnVector) batch.cols[columnIndex]; - } - - @Override - void forgetColumnVector() { - colVector = null; - } - - @Override - void assign(int batchIndex, Object object) { - if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); - } else { - if (object instanceof HiveDecimal) { - colVector.set(batchIndex, (HiveDecimal) object); - } else { - colVector.set(batchIndex, (HiveDecimalWritable) object); - } - colVector.isNull[batchIndex] = false; - } - } - } + //------------------------------------------------------------------------------------------------ private Assigner createAssigner(PrimitiveTypeInfo primitiveTypeInfo, int columnIndex) throws HiveException { PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); @@ -587,11 +202,13 @@ private Assigner createAssigner(PrimitiveTypeInfo primitiveTypeInfo, int columnI } Assigner[] assigners; + PrimitiveTypeInfo[] primitiveTypeInfos; public void init(StructObjectInspector structObjectInspector, List projectedColumns) throws HiveException { List fields = structObjectInspector.getAllStructFieldRefs(); assigners = new Assigner[fields.size()]; + primitiveTypeInfos = new PrimitiveTypeInfo[fields.size()]; int i = 0; for (StructField field : fields) { @@ -600,48 +217,132 @@ public void init(StructObjectInspector structObjectInspector, List proj PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString( fieldInspector.getTypeName()); assigners[i] = createAssigner(primitiveTypeInfo, columnIndex); + primitiveTypeInfos[i] = primitiveTypeInfo; i++; } } - public void init(List typeNames) throws HiveException { + public void init(StructObjectInspector structObjectInspector) throws HiveException { - assigners = new Assigner[typeNames.size()]; + List fields = structObjectInspector.getAllStructFieldRefs(); + assigners = new Assigner[fields.size()]; + primitiveTypeInfos = new PrimitiveTypeInfo[fields.size()]; int i = 0; - for (String typeName : typeNames) { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + for (StructField field : fields) { + ObjectInspector fieldInspector = field.getFieldObjectInspector(); + PrimitiveTypeInfo primitiveTypeInfo = + (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString( + fieldInspector.getTypeName()); assigners[i] = createAssigner(primitiveTypeInfo, i); + primitiveTypeInfos[i] = primitiveTypeInfo; i++; } } - protected void setBatch(VectorizedRowBatch batch) throws HiveException { - for (int i = 0; i < assigners.length; i++) { - Assigner assigner = assigners[i]; - int columnIndex = assigner.getColumnIndex(); - if (batch.cols[columnIndex] == null) { - throw new HiveException("Unexpected null vector column " + columnIndex); + PrimitiveTypeInfo[] sourcePrimitiveTypeInfos; + + public void init(TypeInfo[] sourceTypeInfos, TypeInfo[] targetTypeInfos, + boolean[] conversionFlags, boolean[] columnsToIncludeTruncated) throws HiveException { + + int columnCount = columnsToIncludeTruncated == null ? + sourceTypeInfos.length : columnsToIncludeTruncated.length; + + assigners = new Assigner[columnCount]; + primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + sourcePrimitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + + for (int i = 0; i < columnCount; i++) { + + Assigner assigner; + PrimitiveTypeInfo targetPrimitiveTypeInfo = null; + PrimitiveTypeInfo sourcePrimitiveTypeInfo = null; + + if (columnsToIncludeTruncated != null && !columnsToIncludeTruncated[i]) { + + // Field not included in query. + assigner = null; + + } else { + TypeInfo targetTypeInfo = targetTypeInfos[i]; + + if (targetTypeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) { + + // For now, we don't have an assigner for complex types... + assigner = null; + } else { + TypeInfo sourceTypeInfo = sourceTypeInfos[i]; + + targetPrimitiveTypeInfo = (PrimitiveTypeInfo) targetTypeInfo; + + if (conversionFlags != null && conversionFlags[i]) { + + sourcePrimitiveTypeInfo = (PrimitiveTypeInfo) sourceTypeInfo; + if (VectorPartitionConversion.isImplicitVectorColumnConversion(sourceTypeInfo, targetTypeInfo)) { + + // Do implicit conversion using the source type. + assigner = createAssigner(sourcePrimitiveTypeInfo, i); + } else { + + // Do formal conversion... + assigner = createConversionAssigner(sourcePrimitiveTypeInfo, + targetPrimitiveTypeInfo, i); + } + } else { + assigner = createAssigner(targetPrimitiveTypeInfo, i); + } + } } - assigner.setColumnVector(batch); + + assigners[i] = assigner; + primitiveTypeInfos[i] = targetPrimitiveTypeInfo; + sourcePrimitiveTypeInfos[i] = sourcePrimitiveTypeInfo; } } - protected void forgetBatch() { - for (Assigner assigner : assigners) { - assigner.forgetColumnVector(); + public void init(List typeNames) throws HiveException { + + assigners = new Assigner[typeNames.size()]; + primitiveTypeInfos = new PrimitiveTypeInfo[typeNames.size()]; + + int i = 0; + for (String typeName : typeNames) { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + assigners[i] = createAssigner(primitiveTypeInfo, i); + primitiveTypeInfos[i] = primitiveTypeInfo; + i++; } } - public void assignRowColumn(int batchIndex, int logicalColumnIndex, Object object) { - assigners[logicalColumnIndex].assign(batchIndex, object); + public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex, + Object object) { + assigners[logicalColumnIndex].assign(batch, batchIndex, object); } - public void assignRow(int batchIndex, Object[] objects) { + public void assignRow(VectorizedRowBatch batch, int batchIndex, Object[] objects) { int i = 0; for (Assigner assigner : assigners) { - assigner.assign(batchIndex, objects[i++]); + if (assigner != null) { + assigner.assign(batch, batchIndex, objects[i]); + } + i++; } } + public void assignRow(VectorizedRowBatch batch, int batchIndex, Object object, + StructObjectInspector structObjectInspector) { + + /* Convert input row to standard objects. */ + List standardObjects = new ArrayList(); + ObjectInspectorUtils.copyToStandardObject(standardObjects, object, + structObjectInspector, ObjectInspectorCopyOption.WRITABLE); + + int minLength = Math.min(standardObjects.size(), assigners.length); + for (int i = 0; i < minLength; i++) { + Assigner assigner = assigners[i]; + if (assigner != null) { + assigner.assign(batch, batchIndex, standardObjects.get(i)); + } + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java deleted file mode 100644 index a696825..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import org.apache.hadoop.hive.ql.metadata.HiveException; - -/** - * This class assigns specified columns of a VectorizedRowBatch row from a Writable row Object[]. - * - * The caller provides the hive type names and target column numbers in the order desired to - * assign from the Writable row Object[]. - * - * This class is for use when the batch being assigned may change each time before processOp - * is called. - */ -public class VectorAssignRowDynBatch extends VectorAssignRow { - - public void setBatchOnEntry(VectorizedRowBatch batch) throws HiveException { - setBatch(batch); - } - - public void forgetBatchOnExit() { - forgetBatch(); - } -} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java deleted file mode 100644 index 8c7c2ad..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import org.apache.hadoop.hive.ql.metadata.HiveException; - -/** - * This class assigns specified columns of a VectorizedRowBatch row from a Writable row Object[]. - * - * The caller provides the hive type names and target column numbers in the order desired to - * assign from the Writable row Object[]. - * - * This class is for use when the batch being assigned is always the same. - */ -public class VectorAssignRowSameBatch extends VectorAssignRow { - - public void setOneBatch(VectorizedRowBatch batch) throws HiveException { - setBatch(batch); - } -} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssigner.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssigner.java new file mode 100644 index 0000000..a5e0ae1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssigner.java @@ -0,0 +1,984 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class assigns specified columns of a row from a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * assign from the Writable row Object[]. + * + * This class is abstract to allow the subclasses to control batch reuse. + */ +public abstract class VectorAssigner { + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(VectorAssigner.class); + + public abstract static class Assigner { + protected int columnIndex; + + Assigner(int columnIndex) { + this.columnIndex = columnIndex; + } + + public int getColumnIndex() { + return columnIndex; + } + + abstract void assign(VectorizedRowBatch batch, int batchIndex, Object object); + } + + public static class VoidAssigner extends Assigner { + + VoidAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + // This is no-op, there is no column to assign to and the object is expected to be null. + assert (object == null); + } + } + + public abstract static class AbstractLongAssigner extends Assigner { + + AbstractLongAssigner(int columnIndex) { + super(columnIndex); + } + } + + public static class BooleanAssigner extends AbstractLongAssigner { + + BooleanAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + BooleanWritable bw = (BooleanWritable) object; + colVector.vector[batchIndex] = (bw.get() ? 1 : 0); + colVector.isNull[batchIndex] = false; + } + } + } + + public static class ByteAssigner extends AbstractLongAssigner { + + ByteAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + ByteWritable bw = (ByteWritable) object; + colVector.vector[batchIndex] = bw.get(); + colVector.isNull[batchIndex] = false; + } + } + } + + public static class ShortAssigner extends AbstractLongAssigner { + + ShortAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + ShortWritable sw = (ShortWritable) object; + colVector.vector[batchIndex] = sw.get(); + colVector.isNull[batchIndex] = false; + } + } + } + + public static class IntAssigner extends AbstractLongAssigner { + + IntAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + IntWritable iw = (IntWritable) object; + colVector.vector[batchIndex] = iw.get(); + colVector.isNull[batchIndex] = false; + } + } + } + + public static class LongAssigner extends AbstractLongAssigner { + + LongAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + LongWritable lw = (LongWritable) object; + colVector.vector[batchIndex] = lw.get(); + colVector.isNull[batchIndex] = false; + } + } + } + + public static class DateAssigner extends AbstractLongAssigner { + + DateAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + DateWritable bw = (DateWritable) object; + colVector.vector[batchIndex] = bw.getDays(); + colVector.isNull[batchIndex] = false; + } + } + } + + public static class IntervalYearMonthAssigner extends AbstractLongAssigner { + + IntervalYearMonthAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + HiveIntervalYearMonthWritable iymw = (HiveIntervalYearMonthWritable) object; + HiveIntervalYearMonth iym = iymw.getHiveIntervalYearMonth(); + colVector.vector[batchIndex] = iym.getTotalMonths(); + colVector.isNull[batchIndex] = false; + } + } + } + + private abstract static class AbstractTimestampAssigner extends Assigner { + + AbstractTimestampAssigner(int columnIndex) { + super(columnIndex); + } + } + + public static class TimestampAssigner extends AbstractTimestampAssigner { + + TimestampAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + colVector.set(batchIndex, ((TimestampWritable) object).getTimestamp()); + colVector.isNull[batchIndex] = false; + } + } + } + + public static class IntervalDayTimeAssigner extends AbstractTimestampAssigner { + + IntervalDayTimeAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + IntervalDayTimeColumnVector colVector = (IntervalDayTimeColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + HiveIntervalDayTimeWritable idtw = (HiveIntervalDayTimeWritable) object; + HiveIntervalDayTime idt = idtw.getHiveIntervalDayTime(); + colVector.set(batchIndex, idt); + colVector.isNull[batchIndex] = false; + } + } + } + + public abstract static class AbstractDoubleAssigner extends Assigner { + + AbstractDoubleAssigner(int columnIndex) { + super(columnIndex); + } + } + + public static class FloatAssigner extends AbstractDoubleAssigner { + + FloatAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + FloatWritable fw = (FloatWritable) object; + colVector.vector[batchIndex] = fw.get(); + colVector.isNull[batchIndex] = false; + } + } + } + + public static class DoubleAssigner extends AbstractDoubleAssigner { + + DoubleAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + DoubleWritable dw = (DoubleWritable) object; + colVector.vector[batchIndex] = dw.get(); + colVector.isNull[batchIndex] = false; + } + } + } + + public abstract static class AbstractBytesAssigner extends Assigner { + + AbstractBytesAssigner(int columnIndex) { + super(columnIndex); + } + } + + public static class BinaryAssigner extends AbstractBytesAssigner { + + BinaryAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + BytesWritable bw = (BytesWritable) object; + colVector.setVal(batchIndex, bw.getBytes(), 0, bw.getLength()); + colVector.isNull[batchIndex] = false; + } + } + } + + public static class StringAssigner extends AbstractBytesAssigner { + + StringAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + Text tw = (Text) object; + colVector.setVal(batchIndex, tw.getBytes(), 0, tw.getLength()); + colVector.isNull[batchIndex] = false; + } + } + } + + public static class VarCharAssigner extends AbstractBytesAssigner { + + VarCharAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // We store VARCHAR type stripped of pads. + HiveVarchar hiveVarchar; + if (object instanceof HiveVarchar) { + hiveVarchar = (HiveVarchar) object; + } else { + hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar(); + } + byte[] bytes = hiveVarchar.getValue().getBytes(); + colVector.setVal(batchIndex, bytes, 0, bytes.length); + colVector.isNull[batchIndex] = false; + } + } + } + + public static class CharAssigner extends AbstractBytesAssigner { + + CharAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // We store CHAR type stripped of pads. + HiveChar hiveChar; + if (object instanceof HiveChar) { + hiveChar = (HiveChar) object; + } else { + hiveChar = ((HiveCharWritable) object).getHiveChar(); + } + + // We store CHAR in vector row batch with padding stripped. + byte[] bytes = hiveChar.getStrippedValue().getBytes(); + colVector.setVal(batchIndex, bytes, 0, bytes.length); + colVector.isNull[batchIndex] = false; + } + } + } + + public abstract static class AbstractDecimalAssigner extends Assigner { + + AbstractDecimalAssigner(int columnIndex) { + super(columnIndex); + } + } + + public static class DecimalAssigner extends AbstractDecimalAssigner { + + DecimalAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + DecimalColumnVector colVector = (DecimalColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + if (object instanceof HiveDecimal) { + colVector.set(batchIndex, (HiveDecimal) object); + } else { + colVector.set(batchIndex, (HiveDecimalWritable) object); + } + colVector.isNull[batchIndex] = false; + } + } + } + + //------------------------------------------------------------------------------------------------ + + public static class ConvertToVoidAssigner extends Assigner { + + ConvertToVoidAssigner(TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + // No-op. + } + } + + public abstract static class AbstractConvertToLongAssigner extends AbstractLongAssigner { + + protected final PrimitiveObjectInspector sourcePrimitiveObjectInspector; + + AbstractConvertToLongAssigner(PrimitiveObjectInspector sourcePrimitiveObjectInspector, + int columnIndex) { + super(columnIndex); + this.sourcePrimitiveObjectInspector = sourcePrimitiveObjectInspector; + } + } + + public static class ConvertToBooleanAssigner extends AbstractConvertToLongAssigner { + + ConvertToBooleanAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + colVector.vector[batchIndex] = + (PrimitiveObjectInspectorUtils.getBoolean( + object, sourcePrimitiveObjectInspector) ? 1 : 0); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToByteAssigner extends AbstractConvertToLongAssigner { + + ConvertToByteAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + colVector.vector[batchIndex] = + PrimitiveObjectInspectorUtils.getByte( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToShortAssigner extends AbstractConvertToLongAssigner { + + ConvertToShortAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + colVector.vector[batchIndex] = + PrimitiveObjectInspectorUtils.getShort( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToIntAssigner extends AbstractConvertToLongAssigner { + + ConvertToIntAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + colVector.vector[batchIndex] = + PrimitiveObjectInspectorUtils.getInt( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToLongAssigner extends AbstractConvertToLongAssigner { + + ConvertToLongAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + colVector.vector[batchIndex] = + PrimitiveObjectInspectorUtils.getLong( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToDateAssigner extends AbstractConvertToLongAssigner { + + DateWritable dateWritable; + + ConvertToDateAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + dateWritable = new DateWritable(0); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + Date date = PrimitiveObjectInspectorUtils.getDate( + object, sourcePrimitiveObjectInspector); + if (date == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + dateWritable.set(date); + colVector.vector[batchIndex] = dateWritable.getDays(); + } + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToTimestampAssigner extends AbstractConvertToLongAssigner { + + ConvertToTimestampAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + Timestamp timestamp = + PrimitiveObjectInspectorUtils.getTimestamp( + object, sourcePrimitiveObjectInspector); + if (timestamp == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + colVector.set(batchIndex, timestamp); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToIntervalYearMonthAssigner extends AbstractConvertToLongAssigner { + + ConvertToIntervalYearMonthAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + HiveIntervalYearMonth intervalYearMonth = + PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth( + object, sourcePrimitiveObjectInspector); + if (intervalYearMonth == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + colVector.vector[batchIndex] = intervalYearMonth.getTotalMonths(); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToIntervalDayTimeAssigner extends AbstractConvertToLongAssigner { + + ConvertToIntervalDayTimeAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + IntervalDayTimeColumnVector colVector = (IntervalDayTimeColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + HiveIntervalDayTime intervalDayTime = + PrimitiveObjectInspectorUtils.getHiveIntervalDayTime( + object, sourcePrimitiveObjectInspector); + if (intervalDayTime == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + colVector.set(batchIndex, intervalDayTime); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public abstract static class AbstractConvertToDoubleAssigner extends AbstractDoubleAssigner { + + protected final PrimitiveObjectInspector sourcePrimitiveObjectInspector; + + AbstractConvertToDoubleAssigner(PrimitiveObjectInspector sourcePrimitiveObjectInspector, + int columnIndex) { + super(columnIndex); + this.sourcePrimitiveObjectInspector = sourcePrimitiveObjectInspector; + } + } + + public static class ConvertToFloatAssigner extends AbstractConvertToDoubleAssigner { + + ConvertToFloatAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + colVector.vector[batchIndex] = + PrimitiveObjectInspectorUtils.getFloat( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToDoubleAssigner extends AbstractConvertToDoubleAssigner { + + ConvertToDoubleAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + colVector.vector[batchIndex] = + PrimitiveObjectInspectorUtils.getDouble( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public abstract static class AbstractConvertToBytesAssigner extends AbstractBytesAssigner { + + protected final PrimitiveObjectInspector sourcePrimitiveObjectInspector; + + AbstractConvertToBytesAssigner(PrimitiveObjectInspector sourcePrimitiveObjectInspector, + int columnIndex) { + super(columnIndex); + this.sourcePrimitiveObjectInspector = sourcePrimitiveObjectInspector; + } + } + + public static class ConvertToBinaryAssigner extends AbstractConvertToBytesAssigner { + + ConvertToBinaryAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + BytesWritable bytesWritable = + PrimitiveObjectInspectorUtils.getBinary( + object, sourcePrimitiveObjectInspector); + if (bytesWritable == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + colVector.setVal(batchIndex, bytesWritable.getBytes(), 0, bytesWritable.getLength()); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToStringAssigner extends AbstractConvertToBytesAssigner { + + private final Text text; + + ConvertToStringAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + text = new Text(); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + String string = PrimitiveObjectInspectorUtils.getString( + object, sourcePrimitiveObjectInspector); + if (string == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + text.set(string); + colVector.setVal(batchIndex, text.getBytes(), 0, text.getLength()); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToVarcharAssigner extends AbstractConvertToBytesAssigner { + + ConvertToVarcharAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + HiveVarchar hiveVarchar = + PrimitiveObjectInspectorUtils.getHiveVarchar( + object, sourcePrimitiveObjectInspector); + if (hiveVarchar == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + byte[] bytes = hiveVarchar.getValue().getBytes(); + + // UNDONE: Trim to target maximum length. + + colVector.setVal(batchIndex, bytes, 0, bytes.length); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public static class ConvertToCharAssigner extends AbstractConvertToBytesAssigner { + + ConvertToCharAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + HiveChar hiveChar = + PrimitiveObjectInspectorUtils.getHiveChar( + object, sourcePrimitiveObjectInspector); + if (hiveChar == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + byte[] bytes = hiveChar.getValue().getBytes(); + + // UNDONE: Trim to target maximum length. + + colVector.setVal(batchIndex, bytes, 0, bytes.length); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + public abstract static class AbstractConvertToDecimalAssigner extends AbstractDecimalAssigner { + + protected final PrimitiveObjectInspector sourcePrimitiveObjectInspector; + + AbstractConvertToDecimalAssigner(PrimitiveObjectInspector sourcePrimitiveObjectInspector, + int columnIndex) { + super(columnIndex); + this.sourcePrimitiveObjectInspector = sourcePrimitiveObjectInspector; + } + } + + public static class ConvertToDecimalAssigner extends AbstractConvertToDecimalAssigner { + + ConvertToDecimalAssigner(TypeInfo sourceTypeInfo, + PrimitiveObjectInspector sourcePrimitiveObjectInspector, TypeInfo targetTypeInfo, + int columnIndex) { + super(sourcePrimitiveObjectInspector, columnIndex); + } + + @Override + void assign(VectorizedRowBatch batch, int batchIndex, Object object) { + DecimalColumnVector colVector = (DecimalColumnVector) batch.cols[columnIndex]; + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + HiveDecimal hiveDecimal = + PrimitiveObjectInspectorUtils.getHiveDecimal( + object, sourcePrimitiveObjectInspector); + if (hiveDecimal == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + colVector.set(batchIndex, hiveDecimal); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 3eadc12..8a0289f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -20,26 +20,75 @@ import java.io.EOFException; import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToBinaryAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToBooleanAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToByteAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToCharAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToDateAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToDecimalAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToDoubleAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToFloatAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToIntAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToIntervalDayTimeAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToIntervalYearMonthAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToLongAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToShortAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToStringAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToTimestampAssigner; +import org.apache.hadoop.hive.ql.exec.vector.VectorAssigner.ConvertToVarcharAssigner; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorPartitionConversion; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadBinaryResults; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadDateResults; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadDecimalResults; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadIntervalDayTimeResults; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadIntervalYearMonthResults; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadStringResults; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadTimestampResults; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveIntervalDayTimeObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveIntervalYearMonthObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.Text; import org.apache.hive.common.util.DateUtils; /** * This class deserializes a serialization format into a row of a VectorizedRowBatch. - * + * * The caller provides the hive type names and output column numbers in the order desired to * deserialize. * @@ -54,46 +103,47 @@ private T deserializeRead; - private Reader[] readersByValue; - private Reader[] readersByReference; + private Reader[] readersByValue; + private Reader[] readersByReference; private TypeInfo[] typeInfos; public VectorDeserializeRow(T deserializeRead) { this(); this.deserializeRead = deserializeRead; typeInfos = deserializeRead.typeInfos(); - } // Not public since we must have the deserialize read object. private VectorDeserializeRow() { } - private abstract class Reader { + private interface Reader { + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException; + } + + private abstract class AbstractReader implements Reader { protected int columnIndex; - Reader(int columnIndex) { + AbstractReader(int columnIndex) { this.columnIndex = columnIndex; } - - abstract void apply(VectorizedRowBatch batch, int batchIndex) throws IOException; } - private abstract class AbstractLongReader extends Reader { + private abstract class AbstractLongReader extends AbstractReader { - AbstractLongReader(int columnIndex) { + AbstractLongReader(TypeInfo typeInfo, int columnIndex) { super(columnIndex); } } private class BooleanReader extends AbstractLongReader { - BooleanReader(int columnIndex) { - super(columnIndex); + BooleanReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -108,12 +158,12 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private class ByteReader extends AbstractLongReader { - ByteReader(int columnIndex) { - super(columnIndex); + ByteReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -128,12 +178,12 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private class ShortReader extends AbstractLongReader { - ShortReader(int columnIndex) { - super(columnIndex); + ShortReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -148,12 +198,12 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private class IntReader extends AbstractLongReader { - IntReader(int columnIndex) { - super(columnIndex); + IntReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -168,12 +218,12 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private class LongReader extends AbstractLongReader { - LongReader(int columnIndex) { - super(columnIndex); + LongReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -190,13 +240,13 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { DeserializeRead.ReadDateResults readDateResults; - DateReader(int columnIndex) { - super(columnIndex); + DateReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readDateResults = deserializeRead.createReadDateResults(); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -209,62 +259,64 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private abstract class AbstractTimestampReader extends Reader { - AbstractTimestampReader(int columnIndex) { - super(columnIndex); - } - } - private class TimestampReader extends AbstractTimestampReader { + private class IntervalYearMonthReader extends AbstractLongReader { - DeserializeRead.ReadTimestampResults readTimestampResults; + DeserializeRead.ReadIntervalYearMonthResults readIntervalYearMonthResults; - TimestampReader(int columnIndex) { - super(columnIndex); - readTimestampResults = deserializeRead.createReadTimestampResults(); + IntervalYearMonthReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); + readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { - TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); } else { - deserializeRead.readTimestamp(readTimestampResults); - colVector.set(batchIndex, readTimestampResults.getTimestamp()); + deserializeRead.readIntervalYearMonth(readIntervalYearMonthResults); + HiveIntervalYearMonth hiym = readIntervalYearMonthResults.getHiveIntervalYearMonth(); + colVector.vector[batchIndex] = hiym.getTotalMonths(); colVector.isNull[batchIndex] = false; } } + } + + private abstract class AbstractTimestampReader extends AbstractReader { + AbstractTimestampReader(int columnIndex) { + super(columnIndex); + } } - private class IntervalYearMonthReader extends AbstractLongReader { + private class TimestampReader extends AbstractTimestampReader { - DeserializeRead.ReadIntervalYearMonthResults readIntervalYearMonthResults; + DeserializeRead.ReadTimestampResults readTimestampResults; - IntervalYearMonthReader(int columnIndex) { + TimestampReader(TypeInfo typeInfo, int columnIndex) { super(columnIndex); - readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); + readTimestampResults = deserializeRead.createReadTimestampResults(); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { - LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); } else { - deserializeRead.readIntervalYearMonth(readIntervalYearMonthResults); - HiveIntervalYearMonth hiym = readIntervalYearMonthResults.getHiveIntervalYearMonth(); - colVector.vector[batchIndex] = hiym.getTotalMonths(); + deserializeRead.readTimestamp(readTimestampResults); + colVector.set(batchIndex, readTimestampResults.getTimestamp()); colVector.isNull[batchIndex] = false; } } + } - private abstract class AbstractIntervalDayTimeReader extends Reader { + private abstract class AbstractIntervalDayTimeReader extends AbstractReader { AbstractIntervalDayTimeReader(int columnIndex) { super(columnIndex); @@ -275,13 +327,13 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { DeserializeRead.ReadIntervalDayTimeResults readIntervalDayTimeResults; - IntervalDayTimeReader(int columnIndex) { + IntervalDayTimeReader(TypeInfo typeInfo, int columnIndex) { super(columnIndex); readIntervalDayTimeResults = deserializeRead.createReadIntervalDayTimeResults(); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { IntervalDayTimeColumnVector colVector = (IntervalDayTimeColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -295,21 +347,21 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private abstract class AbstractDoubleReader extends Reader { + private abstract class AbstractDoubleReader extends AbstractReader { - AbstractDoubleReader(int columnIndex) { + AbstractDoubleReader(TypeInfo typeInfo, int columnIndex) { super(columnIndex); } } private class FloatReader extends AbstractDoubleReader { - FloatReader(int columnIndex) { - super(columnIndex); + FloatReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -324,12 +376,12 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private class DoubleReader extends AbstractDoubleReader { - DoubleReader(int columnIndex) { - super(columnIndex); + DoubleReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -342,9 +394,9 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private abstract class AbstractBytesReader extends Reader { + private abstract class AbstractBytesReader extends AbstractReader { - AbstractBytesReader(int columnIndex) { + AbstractBytesReader(TypeInfo typeInfo, int columnIndex) { super(columnIndex); } } @@ -353,13 +405,13 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private DeserializeRead.ReadStringResults readStringResults; - StringReaderByValue(int columnIndex) { - super(columnIndex); + StringReaderByValue(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -377,13 +429,13 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private DeserializeRead.ReadStringResults readStringResults; - StringReaderByReference(int columnIndex) { - super(columnIndex); + StringReaderByReference(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -403,14 +455,14 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private CharTypeInfo charTypeInfo; - CharReaderByValue(CharTypeInfo charTypeInfo, int columnIndex) { - super(columnIndex); + CharReaderByValue(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); - this.charTypeInfo = charTypeInfo; + this.charTypeInfo = (CharTypeInfo) typeInfo; } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -433,14 +485,14 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private CharTypeInfo charTypeInfo; - CharReaderByReference(CharTypeInfo charTypeInfo, int columnIndex) { - super(columnIndex); + CharReaderByReference(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); - this.charTypeInfo = charTypeInfo; + this.charTypeInfo = (CharTypeInfo) typeInfo; } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -463,14 +515,14 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private VarcharTypeInfo varcharTypeInfo; - VarcharReaderByValue(VarcharTypeInfo varcharTypeInfo, int columnIndex) { - super(columnIndex); + VarcharReaderByValue(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); - this.varcharTypeInfo = varcharTypeInfo; + this.varcharTypeInfo = (VarcharTypeInfo) typeInfo; } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -493,14 +545,14 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private VarcharTypeInfo varcharTypeInfo; - VarcharReaderByReference(VarcharTypeInfo varcharTypeInfo, int columnIndex) { - super(columnIndex); + VarcharReaderByReference(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); - this.varcharTypeInfo = varcharTypeInfo; + this.varcharTypeInfo = (VarcharTypeInfo) typeInfo; } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -521,13 +573,13 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private DeserializeRead.ReadBinaryResults readBinaryResults; - BinaryReaderByValue(int columnIndex) { - super(columnIndex); + BinaryReaderByValue(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readBinaryResults = deserializeRead.createReadBinaryResults(); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -545,13 +597,13 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private DeserializeRead.ReadBinaryResults readBinaryResults; - BinaryReaderByReference(int columnIndex) { - super(columnIndex); + BinaryReaderByReference(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readBinaryResults = deserializeRead.createReadBinaryResults(); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -565,17 +617,17 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private class HiveDecimalReader extends Reader { + private class HiveDecimalReader extends AbstractReader { private DeserializeRead.ReadDecimalResults readDecimalResults; - HiveDecimalReader(int columnIndex) { + HiveDecimalReader(TypeInfo typeInfo, int columnIndex) { super(columnIndex); readDecimalResults = deserializeRead.createReadDecimalResults(); } @Override - void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { DecimalColumnVector colVector = (DecimalColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { @@ -589,73 +641,672 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private void addReader(int index, int outputColumn) throws HiveException { - Reader readerByValue = null; - Reader readerByReference = null; + private class NotIncludedColumnReader extends AbstractReader { + + NotIncludedColumnReader(TypeInfo typeInfo, int columnIndex) { + super(columnIndex); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfos[index]; - PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + if (deserializeRead.readCheckNull()) { + // Ignore not included column. + } else { + throw new RuntimeException("Expected a NULL for not included column"); + } + } + } + + + //------------------------------------------------------------------------------------------------ + + private static class ObjectReader { + + private final DeserializeRead deserializeRead; + + private final TypeInfo sourceTypeInfo; + private final ObjectInspector.Category sourceCategory; + + private final ObjectInspector objectInspector; + private Object object; + + private ReadStringResults readStringResults; + private ReadDateResults readDateResults; + private ReadTimestampResults readTimestampResults; + private ReadBinaryResults readBinaryResults; + private ReadDecimalResults readDecimalResults; + private ReadIntervalYearMonthResults readIntervalYearMonthResults; + private ReadIntervalDayTimeResults readIntervalDayTimeResults; + + ObjectReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector) { + this.deserializeRead = deserializeRead; + this.sourceTypeInfo = sourceTypeInfo; + this.objectInspector = sourceObjectInspector; + sourceCategory = sourceTypeInfo.getCategory(); + switch (sourceCategory) { + case PRIMITIVE: + { + PrimitiveTypeInfo sourcePrimitiveTypeInfo = (PrimitiveTypeInfo) sourceTypeInfo; + PrimitiveCategory sourcePrimitiveCategory = sourcePrimitiveTypeInfo.getPrimitiveCategory(); + switch (sourcePrimitiveCategory) { + case BOOLEAN: + object = ((SettableBooleanObjectInspector) objectInspector).create(false); + break; + case BYTE: + object = ((SettableByteObjectInspector) objectInspector).create((byte) 0); + break; + case SHORT: + object = ((SettableShortObjectInspector) objectInspector).create((short) 0); + break; + case INT: + object = ((SettableIntObjectInspector) objectInspector).create(0); + break; + case LONG: + object = ((SettableLongObjectInspector) objectInspector).create(0); + break; + case FLOAT: + object = ((SettableFloatObjectInspector) objectInspector).create(0); + break; + case DOUBLE: + object = ((SettableDoubleObjectInspector) objectInspector).create(0); + break; + case STRING: + readStringResults = deserializeRead.createReadStringResults(); + object = ((SettableStringObjectInspector) objectInspector).create(new Text()); + break; + case DATE: + readDateResults = deserializeRead.createReadDateResults(); + object = ((SettableDateObjectInspector) objectInspector).create(new Date(0)); + break; + case TIMESTAMP: + readTimestampResults = deserializeRead.createReadTimestampResults(); + object = ((SettableTimestampObjectInspector) objectInspector).create(new Timestamp(0)); + break; + case BINARY: + readBinaryResults = deserializeRead.createReadBinaryResults(); + object = ((SettableBinaryObjectInspector) objectInspector).create(ArrayUtils.EMPTY_BYTE_ARRAY); + break; + case DECIMAL: + readDecimalResults = deserializeRead.createReadDecimalResults(); + object = ((SettableHiveDecimalObjectInspector) objectInspector).create(HiveDecimal.ZERO); + break; + case VARCHAR: + readStringResults = deserializeRead.createReadStringResults(); + object = ((SettableHiveVarcharObjectInspector) objectInspector).create(new HiveVarchar()); + break; + case CHAR: + readStringResults = deserializeRead.createReadStringResults(); + object = ((SettableHiveCharObjectInspector) objectInspector).create(new HiveChar()); + break; + case INTERVAL_YEAR_MONTH: + readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); + object = ((SettableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth()); + break; + case INTERVAL_DAY_TIME: + readIntervalDayTimeResults = deserializeRead.createReadIntervalDayTimeResults(); + object = ((SettableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime()); + break; + default: + throw new RuntimeException("Unexpected primitive category " + sourcePrimitiveCategory.name()); + } + } + break; + default: + throw new RuntimeException("Unexpected category " + sourceCategory.name()); + } + } + + public Object read() throws IOException { + Object sourceObject; + if (deserializeRead.readCheckNull()) { + sourceObject = null; + } else { + switch (sourceCategory) { + case PRIMITIVE: + { + PrimitiveTypeInfo sourcePrimitiveTypeInfo = (PrimitiveTypeInfo) sourceTypeInfo; + PrimitiveCategory sourcePrimitiveCategory = sourcePrimitiveTypeInfo.getPrimitiveCategory(); + switch (sourcePrimitiveCategory) { + case BOOLEAN: + object = ((SettableBooleanObjectInspector) objectInspector).set( + object, deserializeRead.readBoolean()); + break; + case BYTE: + object = ((SettableByteObjectInspector) objectInspector).set( + object, deserializeRead.readByte()); + break; + case SHORT: + object = ((SettableShortObjectInspector) objectInspector).set( + object, deserializeRead.readShort()); + break; + case INT: + object = ((SettableIntObjectInspector) objectInspector).set( + object, deserializeRead.readInt()); + break; + case LONG: + object = ((SettableLongObjectInspector) objectInspector).set( + object, deserializeRead.readLong()); + break; + case FLOAT: + object = ((SettableFloatObjectInspector) objectInspector).set( + object, deserializeRead.readFloat()); + break; + case DOUBLE: + object = ((SettableDoubleObjectInspector) objectInspector).set( + object, deserializeRead.readDouble()); + break; + case STRING: + deserializeRead.readString(readStringResults); + ((Text) object).set(readStringResults.bytes, readStringResults.start, readStringResults.length); + break; + case DATE: + deserializeRead.readDate(readDateResults); + object = ((SettableDateObjectInspector) objectInspector).set( + object, readDateResults.getDate()); + break; + case TIMESTAMP: + deserializeRead.readTimestamp(readTimestampResults); + object = ((SettableTimestampObjectInspector) objectInspector).set( + object, readTimestampResults.getTimestamp()); + break; + case BINARY: + deserializeRead.readBinary(readBinaryResults); + object = ((SettableBinaryObjectInspector) objectInspector).create(ArrayUtils.EMPTY_BYTE_ARRAY); + break; + case DECIMAL: + deserializeRead.readHiveDecimal(readDecimalResults); + object = ((SettableHiveDecimalObjectInspector) objectInspector).set( + object, readDecimalResults.getHiveDecimal()); + break; + case VARCHAR: + deserializeRead.readString(readStringResults); + // UNDONE: Truncation? + object = ((SettableHiveVarcharObjectInspector) objectInspector).create(new HiveVarchar()); + break; + case CHAR: + deserializeRead.readString(readStringResults); + // UNDONE: Truncation? + object = ((SettableHiveCharObjectInspector) objectInspector).create(new HiveChar()); + break; + case INTERVAL_YEAR_MONTH: + deserializeRead.readIntervalYearMonth(readIntervalYearMonthResults); + object = ((SettableHiveIntervalYearMonthObjectInspector) objectInspector).set( + object, readIntervalYearMonthResults.getHiveIntervalYearMonth()); + break; + case INTERVAL_DAY_TIME: + deserializeRead.readIntervalDayTime(readIntervalDayTimeResults); + object = ((SettableHiveIntervalDayTimeObjectInspector) objectInspector).set( + object, readIntervalDayTimeResults.getHiveIntervalDayTime()); + break; + default: + throw new RuntimeException("Unexpected primitive category " + sourcePrimitiveCategory.name()); + } + } + break; + default: + throw new RuntimeException("Unexpected category " + sourceCategory.name()); + } + sourceObject = object; + } + return sourceObject; + } + } + + public static class ConvertToBooleanReader extends ConvertToBooleanAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToBooleanReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToByteReader extends ConvertToByteAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToByteReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToShortReader extends ConvertToShortAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToShortReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToIntReader extends ConvertToIntAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToIntReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToLongReader extends ConvertToLongAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToLongReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToDateReader extends ConvertToDateAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToDateReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToTimestampReader extends ConvertToTimestampAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToTimestampReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToIntervalYearMonthReader extends ConvertToIntervalYearMonthAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToIntervalYearMonthReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToIntervalDayTimeReader extends ConvertToIntervalDayTimeAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToIntervalDayTimeReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToFloatReader extends ConvertToFloatAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToFloatReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToDoubleReader extends ConvertToDoubleAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToDoubleReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToStringReader extends ConvertToStringAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToStringReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToCharReader extends ConvertToCharAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToCharReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToVarcharReader extends ConvertToVarcharAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToVarcharReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToBinaryReader extends ConvertToBinaryAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToBinaryReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + public static class ConvertToHiveDecimalReader extends ConvertToDecimalAssigner + implements Reader { + + private final ObjectReader objectReader; + + ConvertToHiveDecimalReader(DeserializeRead deserializeRead, TypeInfo sourceTypeInfo, + ObjectInspector sourceObjectInspector, TypeInfo targetTypeInfo, int columnIndex) { + super(sourceTypeInfo, (PrimitiveObjectInspector) sourceObjectInspector, targetTypeInfo, + columnIndex); + objectReader = new ObjectReader(deserializeRead, sourceTypeInfo, sourceObjectInspector); + } + + @Override + public void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + assign(batch, batchIndex, objectReader.read()); + } + } + + // Currently, we only support these no-precision-loss or promotion data type conversions: + // + // And, all of them stay within the vector column type (Long, Double, Bytes, Decimal) + // for now. + // + // Short -> Int IMPLICIT WITH VECTORIZATION + // Short -> BigInt IMPLICIT WITH VECTORIZATION + // Int --> BigInt IMPLICIT WITH VECTORIZATION + // + // CONSIDER: + // Float -> Double IMPLICIT WITH VECTORIZATION + // (Char | VarChar) -> String IMPLICIT WITH VECTORIZATION + // + + private Reader createConversionReader(TypeInfo sourceTypeInfo, + TypeInfo targetTypeInfo, int outputColumn) throws HiveException { + + PrimitiveObjectInspector sourcePrimitiveObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) sourceTypeInfo); + + PrimitiveTypeInfo targetPrimitiveTypeInfo = (PrimitiveTypeInfo) targetTypeInfo; + PrimitiveCategory targetPrimitiveCategory = + targetPrimitiveTypeInfo.getPrimitiveCategory(); + + Reader reader = null; + switch (targetPrimitiveCategory) { +// case VOID: +// reader = new ConvertToVoidReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, +// targetTypeInfo, outputColumn); +// break; + case BOOLEAN: + reader = new ConvertToBooleanReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case BYTE: + reader = new ConvertToByteReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case SHORT: + reader = new ConvertToShortReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case INT: + reader = new ConvertToIntReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case LONG: + reader = new ConvertToLongReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case TIMESTAMP: + reader = new ConvertToTimestampReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case DATE: + reader = new ConvertToDateReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case FLOAT: + reader = new ConvertToFloatReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case DOUBLE: + reader = new ConvertToDoubleReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case BINARY: + reader = new ConvertToBinaryReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case STRING: + reader = new ConvertToStringReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case VARCHAR: + reader = new ConvertToVarcharReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case CHAR: + reader = new ConvertToCharReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case DECIMAL: + reader = new ConvertToHiveDecimalReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case INTERVAL_YEAR_MONTH: + reader = new ConvertToIntervalYearMonthReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + case INTERVAL_DAY_TIME: + reader = new ConvertToIntervalDayTimeReader(deserializeRead, sourceTypeInfo, sourcePrimitiveObjectInspector, + targetTypeInfo, outputColumn); + break; + default: + throw new HiveException("No vector row reader for target primitive category " + + targetPrimitiveCategory); + } + + return reader; + } + + //------------------------------------------------------------------------------------------------ + + private void addReader(TypeInfo typeInfo, int index, int outputColumn) throws HiveException { + Reader readerByValue = null; + Reader readerByReference = null; + + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); switch (primitiveCategory) { // case VOID: // UNDONE: // break; case BOOLEAN: - readerByValue = new BooleanReader(outputColumn); + readerByValue = new BooleanReader(typeInfo, outputColumn); break; case BYTE: - readerByValue = new ByteReader(outputColumn); + readerByValue = new ByteReader(typeInfo, outputColumn); break; case SHORT: - readerByValue = new ShortReader(outputColumn); + readerByValue = new ShortReader(typeInfo, outputColumn); break; case INT: - readerByValue = new IntReader(outputColumn); + readerByValue = new IntReader(typeInfo, outputColumn); break; case LONG: - readerByValue = new LongReader(outputColumn); + readerByValue = new LongReader(typeInfo, outputColumn); break; case DATE: - readerByValue = new DateReader(outputColumn); + readerByValue = new DateReader(typeInfo, outputColumn); break; case TIMESTAMP: - readerByValue = new TimestampReader(outputColumn); + readerByValue = new TimestampReader(typeInfo, outputColumn); break; case FLOAT: - readerByValue = new FloatReader(outputColumn); + readerByValue = new FloatReader(typeInfo, outputColumn); break; case DOUBLE: - readerByValue = new DoubleReader(outputColumn); + readerByValue = new DoubleReader(typeInfo, outputColumn); break; case STRING: - readerByValue = new StringReaderByValue(outputColumn); - readerByReference = new StringReaderByReference(outputColumn); + readerByValue = new StringReaderByValue(typeInfo, outputColumn); + readerByReference = new StringReaderByReference(typeInfo, outputColumn); break; case CHAR: - { - CharTypeInfo charTypeInfo = (CharTypeInfo) primitiveTypeInfo; - readerByValue = new CharReaderByValue(charTypeInfo, outputColumn); - readerByReference = new CharReaderByReference(charTypeInfo, outputColumn); - } + readerByValue = new CharReaderByValue(typeInfo, outputColumn); + readerByReference = new CharReaderByReference(typeInfo, outputColumn); break; case VARCHAR: - { - VarcharTypeInfo varcharTypeInfo = (VarcharTypeInfo) primitiveTypeInfo; - readerByValue = new VarcharReaderByValue(varcharTypeInfo, outputColumn); - readerByReference = new VarcharReaderByReference(varcharTypeInfo, outputColumn); - } + readerByValue = new VarcharReaderByValue(typeInfo, outputColumn); + readerByReference = new VarcharReaderByReference(typeInfo, outputColumn); break; case BINARY: - readerByValue = new BinaryReaderByValue(outputColumn); - readerByReference = new BinaryReaderByReference(outputColumn); + readerByValue = new BinaryReaderByValue(typeInfo, outputColumn); + readerByReference = new BinaryReaderByReference(typeInfo, outputColumn); break; case DECIMAL: - readerByValue = new HiveDecimalReader(outputColumn); + readerByValue = new HiveDecimalReader(typeInfo, outputColumn); break; case INTERVAL_YEAR_MONTH: - readerByValue = new IntervalYearMonthReader(outputColumn); + readerByValue = new IntervalYearMonthReader(typeInfo, outputColumn); break; case INTERVAL_DAY_TIME: - readerByValue = new IntervalDayTimeReader(outputColumn); + readerByValue = new IntervalDayTimeReader(typeInfo, outputColumn); break; default: throw new HiveException("Unexpected primitive type category " + primitiveCategory); @@ -676,7 +1327,7 @@ public void init(int[] outputColumns) throws HiveException { for (int i = 0; i < typeInfos.length; i++) { int outputColumn = outputColumns[i]; - addReader(i, outputColumn); + addReader(typeInfos[i], i, outputColumn); } } @@ -687,7 +1338,7 @@ public void init(List outputColumns) throws HiveException { for (int i = 0; i < typeInfos.length; i++) { int outputColumn = outputColumns.get(i); - addReader(i, outputColumn); + addReader(typeInfos[i], i, outputColumn); } } @@ -698,7 +1349,86 @@ public void init(int startColumn) throws HiveException { for (int i = 0; i < typeInfos.length; i++) { int outputColumn = startColumn + i; - addReader(i, outputColumn); + addReader(typeInfos[i], i, outputColumn); + } + } + + public void init(boolean[] columnsToIncludeTruncated) throws HiveException { + + if (columnsToIncludeTruncated != null) { + deserializeRead.setColumnsToInclude(columnsToIncludeTruncated); + } + + final int columnCount = (columnsToIncludeTruncated == null ? + typeInfos.length : columnsToIncludeTruncated.length); + + readersByValue = new Reader[columnCount]; + readersByReference = new Reader[columnCount]; + + for (int i = 0; i < columnCount; i++) { + + if (columnsToIncludeTruncated != null && !columnsToIncludeTruncated[i]) { + + // Field not included in query. + + Reader notIncludedColumnReader = new NotIncludedColumnReader(null, i); + readersByValue[i] = notIncludedColumnReader; + readersByReference[i] = notIncludedColumnReader; + + } else { + + addReader(typeInfos[i], i, i); + + } + } + } + + public void init(TypeInfo[] sourceTypeInfos, TypeInfo[] targetTypeInfos, + boolean[] conversionFlags, boolean[] columnsToIncludeTruncated) throws HiveException { + + if (columnsToIncludeTruncated != null) { + deserializeRead.setColumnsToInclude(columnsToIncludeTruncated); + } + + final int columnCount = (columnsToIncludeTruncated == null ? + typeInfos.length : columnsToIncludeTruncated.length); + + readersByValue = new Reader[columnCount]; + readersByReference = new Reader[columnCount]; + + for (int i = 0; i < columnCount; i++) { + + if (columnsToIncludeTruncated != null && !columnsToIncludeTruncated[i]) { + + // Field not included in query. + + Reader notIncludedColumnReader = new NotIncludedColumnReader(null, i); + readersByValue[i] = notIncludedColumnReader; + readersByReference[i] = notIncludedColumnReader; + + } else { + + TypeInfo sourceTypeInfo = sourceTypeInfos[i]; + TypeInfo targetTypeInfo = targetTypeInfos[i]; + + if (conversionFlags != null && conversionFlags[i]) { + + if (VectorPartitionConversion.isImplicitVectorColumnConversion(sourceTypeInfo, targetTypeInfo)) { + + // Do implicit conversion using the source type. + addReader(sourceTypeInfo, i, i); + } else { + + Reader reader = createConversionReader(sourceTypeInfo, targetTypeInfo, i); + readersByValue[i] = reader; + readersByReference[i] = reader; + } + } else { + + addReader(typeInfos[i], i, i); + + } + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index e883f38..49e913a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -18,49 +18,30 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.io.IOException; -import java.sql.Date; -import java.sql.Timestamp; import java.util.List; -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.common.type.HiveVarchar; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractor.*; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.hadoop.io.Text; -import org.apache.hive.common.util.DateUtils; /** * This class extracts specified VectorizedRowBatch row columns into a Writable row Object[]. * * The caller provides the hive type names and target column numbers in the order desired to * extract from the Writable row Object[]. - * - * This class is abstract to allow the subclasses to control batch reuse. */ -public abstract class VectorExtractRow { +public class VectorExtractRow { private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(VectorExtractRow.class); @@ -71,576 +52,6 @@ public VectorExtractRow() { tolerateNullColumns = true; } - protected abstract class Extractor { - protected int columnIndex; - protected Object object; - - public Extractor(int columnIndex) { - this.columnIndex = columnIndex; - } - - public int getColumnIndex() { - return columnIndex; - } - - abstract void setColumnVector(VectorizedRowBatch batch); - - abstract void forgetColumnVector(); - - abstract Object extract(int batchIndex); - } - - private class VoidExtractor extends Extractor { - - VoidExtractor(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - } - - @Override - void forgetColumnVector() { - } - - @Override - Object extract(int batchIndex) { - return null; - } - } - - private abstract class AbstractLongExtractor extends Extractor { - - protected LongColumnVector colVector; - protected long[] vector; - - AbstractLongExtractor(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (LongColumnVector) batch.cols[columnIndex]; - vector = colVector.vector; - } - - @Override - void forgetColumnVector() { - colVector = null; - vector = null; - } - } - - protected class BooleanExtractor extends AbstractLongExtractor { - - BooleanExtractor(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector.create(false); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - long value = vector[adjustedIndex]; - PrimitiveObjectInspectorFactory.writableBooleanObjectInspector.set(object, value == 0 ? false : true); - return object; - } else { - return null; - } - } - } - - protected class ByteExtractor extends AbstractLongExtractor { - - ByteExtractor(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableByteObjectInspector.create((byte) 0); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - long value = vector[adjustedIndex]; - PrimitiveObjectInspectorFactory.writableByteObjectInspector.set(object, (byte) value); - return object; - } else { - return null; - } - } - } - - private class ShortExtractor extends AbstractLongExtractor { - - ShortExtractor(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableShortObjectInspector.create((short) 0); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - long value = vector[adjustedIndex]; - PrimitiveObjectInspectorFactory.writableShortObjectInspector.set(object, (short) value); - return object; - } else { - return null; - } - } - } - - private class IntExtractor extends AbstractLongExtractor { - - IntExtractor(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableIntObjectInspector.create(0); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - long value = vector[adjustedIndex]; - PrimitiveObjectInspectorFactory.writableIntObjectInspector.set(object, (int) value); - return object; - } else { - return null; - } - } - } - - private class LongExtractor extends AbstractLongExtractor { - - LongExtractor(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableLongObjectInspector.create(0); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - long value = vector[adjustedIndex]; - PrimitiveObjectInspectorFactory.writableLongObjectInspector.set(object, value); - return object; - } else { - return null; - } - } - } - - private class DateExtractor extends AbstractLongExtractor { - - private Date date; - - DateExtractor(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableDateObjectInspector.create(new Date(0)); - date = new Date(0); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - long value = vector[adjustedIndex]; - date.setTime(DateWritable.daysToMillis((int) value)); - PrimitiveObjectInspectorFactory.writableDateObjectInspector.set(object, date); - return object; - } else { - return null; - } - } - } - - private abstract class AbstractTimestampExtractor extends Extractor { - - protected TimestampColumnVector colVector; - - AbstractTimestampExtractor(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (TimestampColumnVector) batch.cols[columnIndex]; - } - - @Override - void forgetColumnVector() { - colVector = null; - } - } - - private class TimestampExtractor extends AbstractTimestampExtractor { - - protected Timestamp timestamp; - - TimestampExtractor(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.create(new Timestamp(0)); - timestamp = new Timestamp(0); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - colVector.timestampUpdate(timestamp, adjustedIndex); - PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.set(object, timestamp); - return object; - } else { - return null; - } - } - } - - private class IntervalYearMonthExtractor extends AbstractLongExtractor { - - private HiveIntervalYearMonth hiveIntervalYearMonth; - - IntervalYearMonthExtractor(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.create(new HiveIntervalYearMonth(0)); - hiveIntervalYearMonth = new HiveIntervalYearMonth(0); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - int totalMonths = (int) vector[adjustedIndex]; - hiveIntervalYearMonth.set(totalMonths); - PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.set(object, hiveIntervalYearMonth); - return object; - } else { - return null; - } - } - } - - private abstract class AbstractIntervalDayTimeExtractor extends Extractor { - - protected IntervalDayTimeColumnVector colVector; - - AbstractIntervalDayTimeExtractor(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (IntervalDayTimeColumnVector) batch.cols[columnIndex]; - } - - @Override - void forgetColumnVector() { - colVector = null; - } - } - - private class IntervalDayTimeExtractor extends AbstractIntervalDayTimeExtractor { - - private HiveIntervalDayTime hiveIntervalDayTime; - - IntervalDayTimeExtractor(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.create(new HiveIntervalDayTime(0, 0)); - hiveIntervalDayTime = new HiveIntervalDayTime(0, 0); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - hiveIntervalDayTime.set(colVector.asScratchIntervalDayTime(adjustedIndex)); - PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.set(object, hiveIntervalDayTime); - return object; - } else { - return null; - } - } - } - - private abstract class AbstractDoubleExtractor extends Extractor { - - protected DoubleColumnVector colVector; - protected double[] vector; - - AbstractDoubleExtractor(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (DoubleColumnVector) batch.cols[columnIndex]; - vector = colVector.vector; - } - - @Override - void forgetColumnVector() { - colVector = null; - vector = null; - } - } - - private class FloatExtractor extends AbstractDoubleExtractor { - - FloatExtractor(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableFloatObjectInspector.create(0f); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - double value = vector[adjustedIndex]; - PrimitiveObjectInspectorFactory.writableFloatObjectInspector.set(object, (float) value); - return object; - } else { - return null; - } - } - } - - private class DoubleExtractor extends AbstractDoubleExtractor { - - DoubleExtractor(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.create(0f); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - double value = vector[adjustedIndex]; - PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.set(object, value); - return object; - } else { - return null; - } - } - } - - private abstract class AbstractBytesExtractor extends Extractor { - - protected BytesColumnVector colVector; - - AbstractBytesExtractor(int columnIndex) { - super(columnIndex); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (BytesColumnVector) batch.cols[columnIndex]; - } - - @Override - void forgetColumnVector() { - colVector = null; - } - } - - private class BinaryExtractorByValue extends AbstractBytesExtractor { - - private DataOutputBuffer buffer; - - // Use the BytesWritable instance here as a reference to data saved in buffer. We do not - // want to pass the binary object inspector a byte[] since we would need to allocate it on the - // heap each time to get the length correct. - private BytesWritable bytesWritable; - - BinaryExtractorByValue(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY); - buffer = new DataOutputBuffer(); - bytesWritable = new BytesWritable(); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - byte[] bytes = colVector.vector[adjustedIndex]; - int start = colVector.start[adjustedIndex]; - int length = colVector.length[adjustedIndex]; - - // Save a copy of the binary data. - buffer.reset(); - try { - buffer.write(bytes, start, length); - } catch (IOException ioe) { - throw new IllegalStateException("bad write", ioe); - } - - bytesWritable.set(buffer.getData(), 0, buffer.getLength()); - PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.set(object, bytesWritable); - return object; - } else { - return null; - } - } - } - - private class StringExtractorByValue extends AbstractBytesExtractor { - - // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. - private Text text; - - StringExtractorByValue(int columnIndex) { - super(columnIndex); - object = PrimitiveObjectInspectorFactory.writableStringObjectInspector.create(StringUtils.EMPTY); - text = new Text(); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - byte[] value = colVector.vector[adjustedIndex]; - int start = colVector.start[adjustedIndex]; - int length = colVector.length[adjustedIndex]; - - if (value == null) { - LOG.info("null string entry: batchIndex " + batchIndex + " columnIndex " + columnIndex); - } - // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. - text.set(value, start, length); - - PrimitiveObjectInspectorFactory.writableStringObjectInspector.set(object, text); - return object; - } else { - return null; - } - } - } - - private class VarCharExtractorByValue extends AbstractBytesExtractor { - - // We need our own instance of the VARCHAR object inspector to hold the maximum length - // from the TypeInfo. - private WritableHiveVarcharObjectInspector writableVarcharObjectInspector; - - // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. - private Text text; - - /* - * @param varcharTypeInfo - * We need the VARCHAR type information that contains the maximum length. - * @param columnIndex - * The vector row batch column that contains the bytes for the VARCHAR. - */ - VarCharExtractorByValue(VarcharTypeInfo varcharTypeInfo, int columnIndex) { - super(columnIndex); - writableVarcharObjectInspector = new WritableHiveVarcharObjectInspector(varcharTypeInfo); - object = writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); - text = new Text(); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - byte[] value = colVector.vector[adjustedIndex]; - int start = colVector.start[adjustedIndex]; - int length = colVector.length[adjustedIndex]; - - // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. - text.set(value, start, length); - - writableVarcharObjectInspector.set(object, text.toString()); - return object; - } else { - return null; - } - } - } - - private class CharExtractorByValue extends AbstractBytesExtractor { - - // We need our own instance of the CHAR object inspector to hold the maximum length - // from the TypeInfo. - private WritableHiveCharObjectInspector writableCharObjectInspector; - - // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. - private Text text; - - /* - * @param varcharTypeInfo - * We need the CHAR type information that contains the maximum length. - * @param columnIndex - * The vector row batch column that contains the bytes for the CHAR. - */ - CharExtractorByValue(CharTypeInfo charTypeInfo, int columnIndex) { - super(columnIndex); - writableCharObjectInspector = new WritableHiveCharObjectInspector(charTypeInfo); - object = writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); - text = new Text(); - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - byte[] value = colVector.vector[adjustedIndex]; - int start = colVector.start[adjustedIndex]; - int length = colVector.length[adjustedIndex]; - - // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. - text.set(value, start, length); - - writableCharObjectInspector.set(object, text.toString()); - return object; - } else { - return null; - } - } - } - - private class DecimalExtractor extends Extractor { - - private WritableHiveDecimalObjectInspector writableDecimalObjectInspector; - protected DecimalColumnVector colVector; - - /* - * @param decimalTypeInfo - * We need the DECIMAL type information that contains scale and precision. - * @param columnIndex - * The vector row batch column that contains the bytes for the VARCHAR. - */ - DecimalExtractor(DecimalTypeInfo decimalTypeInfo, int columnIndex) { - super(columnIndex); - writableDecimalObjectInspector = new WritableHiveDecimalObjectInspector(decimalTypeInfo); - object = writableDecimalObjectInspector.create(HiveDecimal.ZERO); - } - - @Override - void setColumnVector(VectorizedRowBatch batch) { - colVector = (DecimalColumnVector) batch.cols[columnIndex]; - } - - @Override - void forgetColumnVector() { - colVector = null; - } - - @Override - Object extract(int batchIndex) { - int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); - if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - HiveDecimal value = colVector.vector[adjustedIndex].getHiveDecimal(); - writableDecimalObjectInspector.set(object, value); - return object; - } else { - return null; - } - } - } - private Extractor createExtractor(PrimitiveTypeInfo primitiveTypeInfo, int columnIndex) throws HiveException { PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); Extractor extracter; @@ -676,16 +87,16 @@ private Extractor createExtractor(PrimitiveTypeInfo primitiveTypeInfo, int colum extracter = new DoubleExtractor(columnIndex); break; case BINARY: - extracter = new BinaryExtractorByValue(columnIndex); + extracter = new BinaryExtractor(columnIndex); break; case STRING: - extracter = new StringExtractorByValue(columnIndex); + extracter = new StringExtractor(columnIndex); break; case VARCHAR: - extracter = new VarCharExtractorByValue((VarcharTypeInfo) primitiveTypeInfo, columnIndex); + extracter = new VarcharExtractor((VarcharTypeInfo) primitiveTypeInfo, columnIndex); break; case CHAR: - extracter = new CharExtractorByValue((CharTypeInfo) primitiveTypeInfo, columnIndex); + extracter = new CharExtractor((CharTypeInfo) primitiveTypeInfo, columnIndex); break; case DECIMAL: extracter = new DecimalExtractor((DecimalTypeInfo) primitiveTypeInfo, columnIndex); @@ -738,8 +149,22 @@ public int getCount() { return extracters.length; } - protected void setBatch(VectorizedRowBatch batch) throws HiveException { + public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) { + Extractor extracter = extracters[logicalColumnIndex]; + int columnIndex = extracter.getColumnIndex(); + if (batch.cols[columnIndex] == null) { + if (tolerateNullColumns) { + // Replace with void... + extracter = new VoidExtractor(columnIndex); + extracters[logicalColumnIndex] = extracter; + } else { + throw new RuntimeException("Unexpected null vector column " + columnIndex); + } + } + return extracter.extract(batch, batchIndex); + } + public void extractRow(VectorizedRowBatch batch, int batchIndex, Object[] objects) { for (int i = 0; i < extracters.length; i++) { Extractor extracter = extracters[i]; int columnIndex = extracter.getColumnIndex(); @@ -749,27 +174,10 @@ protected void setBatch(VectorizedRowBatch batch) throws HiveException { extracter = new VoidExtractor(columnIndex); extracters[i] = extracter; } else { - throw new HiveException("Unexpected null vector column " + columnIndex); + throw new RuntimeException("Unexpected null vector column " + columnIndex); } } - extracter.setColumnVector(batch); - } - } - - protected void forgetBatch() { - for (Extractor extracter : extracters) { - extracter.forgetColumnVector(); - } - } - - public Object extractRowColumn(int batchIndex, int logicalColumnIndex) { - return extracters[logicalColumnIndex].extract(batchIndex); - } - - public void extractRow(int batchIndex, Object[] objects) { - for (int i = 0; i < extracters.length; i++) { - Extractor extracter = extracters[i]; - objects[i] = extracter.extract(batchIndex); + objects[i] = extracter.extract(batch, batchIndex); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowDynBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowDynBatch.java deleted file mode 100644 index 0ff7145..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowDynBatch.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import org.apache.hadoop.hive.ql.metadata.HiveException; - -/** - * This class extracts specified VectorizedRowBatch row columns into a Writable row Object[]. - * - * The caller provides the hive type names and target column numbers in the order desired to - * extract from the Writable row Object[]. - * - * This class is for use when the batch being assigned is always the same. - */ -public class VectorExtractRowDynBatch extends VectorExtractRow { - - public void setBatchOnEntry(VectorizedRowBatch batch) throws HiveException { - setBatch(batch); - } - - public void forgetBatchOnExit() { - forgetBatch(); - } -} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowSameBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowSameBatch.java deleted file mode 100644 index faec0aa..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowSameBatch.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import org.apache.hadoop.hive.ql.metadata.HiveException; - -/** - * This class extracts specified VectorizedRowBatch row columns into a Writable row Object[]. - * - * The caller provides the hive type names and target column numbers in the order desired to - * extract from the Writable row Object[]. - * - * This class is for use when the batch being assigned is always the same. - */ -public class VectorExtractRowSameBatch extends VectorExtractRow { - - public void setOneBatch(VectorizedRowBatch batch) throws HiveException { - setBatch(batch); - } -} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractor.java new file mode 100644 index 0000000..1a73b42 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractor.java @@ -0,0 +1,639 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateUtils; + +/** + * This class extracts specified VectorizedRowBatch row columns into a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * extract from the Writable row Object[]. + * + * This class is abstract to allow the subclasses to control batch reuse. + */ +public abstract class VectorExtractor { + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(VectorExtractor.class); + + public abstract static class Extractor { + protected int columnIndex; + protected Object object; + + public Extractor(int columnIndex) { + this.columnIndex = columnIndex; + } + + public int getColumnIndex() { + return columnIndex; + } + + abstract ObjectInspector getObjectInspector(); + + abstract Object extract(VectorizedRowBatch batch, int batchIndex); + } + + public static class VoidExtractor extends Extractor { + + public VoidExtractor(int columnIndex) { + super(columnIndex); + } + + @Override + public ObjectInspector getObjectInspector() { + return null; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + throw new RuntimeException("Expected to be overriden"); + } + } + + public static class BooleanExtractor extends Extractor { + + public BooleanExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector.create(false); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + LongColumnVector longColVector = (LongColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (longColVector.isRepeating ? 0 : batchIndex); + if (longColVector.noNulls || !longColVector.isNull[adjustedIndex]) { + long value = longColVector.vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableBooleanObjectInspector.set(object, value == 0 ? false : true); + return object; + } else { + return null; + } + } + } + + public static class ByteExtractor extends Extractor { + + public ByteExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableByteObjectInspector.create((byte) 0); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableByteObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + LongColumnVector longColVector = (LongColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (longColVector.isRepeating ? 0 : batchIndex); + if (longColVector.noNulls || !longColVector.isNull[adjustedIndex]) { + long value = longColVector.vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableByteObjectInspector.set(object, (byte) value); + return object; + } else { + return null; + } + } + } + + public static class ShortExtractor extends Extractor { + + public ShortExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableShortObjectInspector.create((short) 0); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableShortObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + LongColumnVector longColVector = (LongColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (longColVector.isRepeating ? 0 : batchIndex); + if (longColVector.noNulls || !longColVector.isNull[adjustedIndex]) { + long value = longColVector.vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableShortObjectInspector.set(object, (short) value); + return object; + } else { + return null; + } + } + } + + public static class IntExtractor extends Extractor { + + public IntExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableIntObjectInspector.create(0); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableIntObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + LongColumnVector longColVector = (LongColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (longColVector.isRepeating ? 0 : batchIndex); + if (longColVector.noNulls || !longColVector.isNull[adjustedIndex]) { + long value = longColVector.vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableIntObjectInspector.set(object, (int) value); + return object; + } else { + return null; + } + } + } + + public static class LongExtractor extends Extractor { + + public LongExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableLongObjectInspector.create(0); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableLongObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + LongColumnVector longColVector = (LongColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (longColVector.isRepeating ? 0 : batchIndex); + if (longColVector.noNulls || !longColVector.isNull[adjustedIndex]) { + long value = longColVector.vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableLongObjectInspector.set(object, value); + return object; + } else { + return null; + } + } + } + + public static class DateExtractor extends Extractor { + + private Date date; + + public DateExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableDateObjectInspector.create(new Date(0)); + date = new Date(0); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableDateObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + LongColumnVector longColVector = (LongColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (longColVector.isRepeating ? 0 : batchIndex); + if (longColVector.noNulls || !longColVector.isNull[adjustedIndex]) { + long value = longColVector.vector[adjustedIndex]; + date.setTime(DateWritable.daysToMillis((int) value)); + PrimitiveObjectInspectorFactory.writableDateObjectInspector.set(object, date); + return object; + } else { + return null; + } + } + } + + public static class TimestampExtractor extends Extractor { + + protected Timestamp timestamp; + + public TimestampExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.create(new Timestamp(0)); + timestamp = new Timestamp(0); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + TimestampColumnVector timestampColVector = (TimestampColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (timestampColVector.isRepeating ? 0 : batchIndex); + if (timestampColVector.noNulls || !timestampColVector.isNull[adjustedIndex]) { + timestampColVector.timestampUpdate(timestamp, adjustedIndex); + PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.set(object, timestamp); + return object; + } else { + return null; + } + } + } + + public static class IntervalYearMonthExtractor extends Extractor { + + private HiveIntervalYearMonth hiveIntervalYearMonth; + + public IntervalYearMonthExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.create(new HiveIntervalYearMonth(0)); + hiveIntervalYearMonth = new HiveIntervalYearMonth(0); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + LongColumnVector longColVector = (LongColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (longColVector.isRepeating ? 0 : batchIndex); + if (longColVector.noNulls || !longColVector.isNull[adjustedIndex]) { + int totalMonths = (int) longColVector.vector[adjustedIndex]; + hiveIntervalYearMonth.set(totalMonths); + PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.set(object, hiveIntervalYearMonth); + return object; + } else { + return null; + } + } + } + + public static class IntervalDayTimeExtractor extends Extractor { + + private HiveIntervalDayTime hiveIntervalDayTime; + + public IntervalDayTimeExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.create(new HiveIntervalDayTime(0, 0)); + hiveIntervalDayTime = new HiveIntervalDayTime(0, 0); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + IntervalDayTimeColumnVector timestampColVector = (IntervalDayTimeColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (timestampColVector.isRepeating ? 0 : batchIndex); + if (timestampColVector.noNulls || !timestampColVector.isNull[adjustedIndex]) { + hiveIntervalDayTime.set(timestampColVector.asScratchIntervalDayTime(adjustedIndex)); + PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.set(object, hiveIntervalDayTime); + return object; + } else { + return null; + } + } + } + + public static class FloatExtractor extends Extractor { + + public FloatExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableFloatObjectInspector.create(0f); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableFloatObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + DoubleColumnVector doubleColVector = (DoubleColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (doubleColVector.isRepeating ? 0 : batchIndex); + if (doubleColVector.noNulls || !doubleColVector.isNull[adjustedIndex]) { + double value = doubleColVector.vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableFloatObjectInspector.set(object, (float) value); + return object; + } else { + return null; + } + } + } + + public static class DoubleExtractor extends Extractor { + + public DoubleExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.create(0f); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + DoubleColumnVector doubleColVector = (DoubleColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (doubleColVector.isRepeating ? 0 : batchIndex); + if (doubleColVector.noNulls || !doubleColVector.isNull[adjustedIndex]) { + double value = doubleColVector.vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.set(object, value); + return object; + } else { + return null; + } + } + } + + public static class BinaryExtractor extends Extractor { + + private DataOutputBuffer buffer; + + // Use the BytesWritable instance here as a reference to data saved in buffer. We do not + // want to pass the binary object inspector a byte[] since we would need to allocate it on the + // heap each time to get the length correct. + private BytesWritable bytesWritable; + + public BinaryExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY); + buffer = new DataOutputBuffer(); + bytesWritable = new BytesWritable(); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + BytesColumnVector bytesColVector = (BytesColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (bytesColVector.isRepeating ? 0 : batchIndex); + if (bytesColVector.noNulls || !bytesColVector.isNull[adjustedIndex]) { + byte[] bytes = bytesColVector.vector[adjustedIndex]; + int start = bytesColVector.start[adjustedIndex]; + int length = bytesColVector.length[adjustedIndex]; + + // Save a copy of the binary data. + buffer.reset(); + try { + buffer.write(bytes, start, length); + } catch (IOException ioe) { + throw new IllegalStateException("bad write", ioe); + } + + bytesWritable.set(buffer.getData(), 0, buffer.getLength()); + PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.set(object, bytesWritable); + return object; + } else { + return null; + } + } + } + + public static class StringExtractor extends Extractor { + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + private Text text; + + public StringExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableStringObjectInspector.create(StringUtils.EMPTY); + text = new Text(); + } + + @Override + public ObjectInspector getObjectInspector() { + return PrimitiveObjectInspectorFactory.writableStringObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + BytesColumnVector bytesColVector = (BytesColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (bytesColVector.isRepeating ? 0 : batchIndex); + if (bytesColVector.noNulls || !bytesColVector.isNull[adjustedIndex]) { + byte[] value = bytesColVector.vector[adjustedIndex]; + int start = bytesColVector.start[adjustedIndex]; + int length = bytesColVector.length[adjustedIndex]; + + if (value == null) { + LOG.info("null string entry: batchIndex " + batchIndex + " columnIndex " + columnIndex); + } + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + text.set(value, start, length); + + PrimitiveObjectInspectorFactory.writableStringObjectInspector.set(object, text); + return object; + } else { + return null; + } + } + } + + public static class VarcharExtractor extends Extractor { + + // We need our own instance of the VARCHAR object inspector to hold the maximum length + // from the TypeInfo. + private WritableHiveVarcharObjectInspector writableVarcharObjectInspector; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + private Text text; + + /* + * @param varcharTypeInfo + * We need the VARCHAR type information that contains the maximum length. + * @param columnIndex + * The vector row batch column that contains the bytes for the VARCHAR. + */ + public VarcharExtractor(VarcharTypeInfo varcharTypeInfo, int columnIndex) { + super(columnIndex); + writableVarcharObjectInspector = new WritableHiveVarcharObjectInspector(varcharTypeInfo); + object = writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); + text = new Text(); + } + + @Override + public ObjectInspector getObjectInspector() { + return writableVarcharObjectInspector; + } + + public VarcharExtractor(int columnIndex, int maxLength) { + super(columnIndex); + writableVarcharObjectInspector = + new WritableHiveVarcharObjectInspector(new VarcharTypeInfo(maxLength)); + object = writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); + text = new Text(); + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + BytesColumnVector bytesColVector = (BytesColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (bytesColVector.isRepeating ? 0 : batchIndex); + if (bytesColVector.noNulls || !bytesColVector.isNull[adjustedIndex]) { + byte[] value = bytesColVector.vector[adjustedIndex]; + int start = bytesColVector.start[adjustedIndex]; + int length = bytesColVector.length[adjustedIndex]; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + text.set(value, start, length); + + writableVarcharObjectInspector.set(object, text.toString()); + return object; + } else { + return null; + } + } + } + + public static class CharExtractor extends Extractor { + + // We need our own instance of the CHAR object inspector to hold the maximum length + // from the TypeInfo. + private WritableHiveCharObjectInspector writableCharObjectInspector; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + private Text text; + + /* + * @param varcharTypeInfo + * We need the CHAR type information that contains the maximum length. + * @param columnIndex + * The vector row batch column that contains the bytes for the CHAR. + */ + public CharExtractor(CharTypeInfo charTypeInfo, int columnIndex) { + super(columnIndex); + writableCharObjectInspector = new WritableHiveCharObjectInspector(charTypeInfo); + object = writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); + text = new Text(); + } + + @Override + public ObjectInspector getObjectInspector() { + return writableCharObjectInspector; + } + + public CharExtractor(int columnIndex, int maxLength) { + super(columnIndex); + writableCharObjectInspector = + new WritableHiveCharObjectInspector(new CharTypeInfo(maxLength)); + object = writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); + text = new Text(); + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + BytesColumnVector bytesColVector = (BytesColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (bytesColVector.isRepeating ? 0 : batchIndex); + if (bytesColVector.noNulls || !bytesColVector.isNull[adjustedIndex]) { + byte[] value = bytesColVector.vector[adjustedIndex]; + int start = bytesColVector.start[adjustedIndex]; + int length = bytesColVector.length[adjustedIndex]; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + text.set(value, start, length); + + writableCharObjectInspector.set(object, text.toString()); + return object; + } else { + return null; + } + } + } + + public static class DecimalExtractor extends Extractor { + + private WritableHiveDecimalObjectInspector writableDecimalObjectInspector; + + /* + * @param decimalTypeInfo + * We need the DECIMAL type information that contains scale and precision. + * @param columnIndex + * The vector row batch column that contains the bytes for the VARCHAR. + */ + public DecimalExtractor(DecimalTypeInfo decimalTypeInfo, int columnIndex) { + super(columnIndex); + writableDecimalObjectInspector = new WritableHiveDecimalObjectInspector(decimalTypeInfo); + object = writableDecimalObjectInspector.create(HiveDecimal.ZERO); + } + + public DecimalExtractor(int columnIndex, int precision, int scale) { + super(columnIndex); + writableDecimalObjectInspector = + new WritableHiveDecimalObjectInspector( + new DecimalTypeInfo(precision, scale)); + object = writableDecimalObjectInspector.create(HiveDecimal.ZERO); + } + + @Override + public ObjectInspector getObjectInspector() { + return writableDecimalObjectInspector; + } + + @Override + Object extract(VectorizedRowBatch batch, int batchIndex) { + DecimalColumnVector decimalColVector = (DecimalColumnVector) batch.cols[columnIndex]; + int adjustedIndex = (decimalColVector.isRepeating ? 0 : batchIndex); + if (decimalColVector.noNulls || !decimalColVector.isNull[adjustedIndex]) { + HiveDecimal value = decimalColVector.vector[adjustedIndex].getHiveDecimal(); + writableDecimalObjectInspector.set(object, value); + return object; + } else { + return null; + } + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java index f09534c..8d8d7a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java @@ -44,7 +44,7 @@ private transient boolean firstBatch; - private transient VectorExtractRowDynBatch vectorExtractRowDynBatch; + private transient VectorExtractRow vectorExtractRow; protected transient Object[] singleRow; @@ -80,30 +80,26 @@ protected void initializeOp(Configuration hconf) throws HiveException { public void process(Object data, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) data; if (firstBatch) { - vectorExtractRowDynBatch = new VectorExtractRowDynBatch(); - vectorExtractRowDynBatch.init((StructObjectInspector) inputObjInspectors[0], vContext.getProjectedColumns()); + vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init((StructObjectInspector) inputObjInspectors[0], vContext.getProjectedColumns()); - singleRow = new Object[vectorExtractRowDynBatch.getCount()]; + singleRow = new Object[vectorExtractRow.getCount()]; firstBatch = false; } - vectorExtractRowDynBatch.setBatchOnEntry(batch); - if (batch.selectedInUse) { int selected[] = batch.selected; for (int logical = 0 ; logical < batch.size; logical++) { int batchIndex = selected[logical]; - vectorExtractRowDynBatch.extractRow(batchIndex, singleRow); + vectorExtractRow.extractRow(batch, batchIndex, singleRow); super.process(singleRow, tag); } } else { for (int batchIndex = 0 ; batchIndex < batch.size; batchIndex++) { - vectorExtractRowDynBatch.extractRow(batchIndex, singleRow); + vectorExtractRow.extractRow(batch, batchIndex, singleRow); super.process(singleRow, tag); } } - - vectorExtractRowDynBatch.forgetBatchOnExit(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 31f5c72..eac430c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -103,7 +103,7 @@ private transient VectorizedRowBatch outputBatch; private transient VectorizedRowBatchCtx vrbCtx; - private transient VectorAssignRowSameBatch vectorAssignRowSameBatch; + private transient VectorAssignRow vectorAssignRow; private transient int numEntriesHashTable; @@ -822,9 +822,8 @@ protected void initializeOp(Configuration hconf) throws HiveException { vrbCtx = new VectorizedRowBatchCtx(); vrbCtx.init((StructObjectInspector) outputObjInspector, vOutContext.getScratchColumnTypeNames()); outputBatch = vrbCtx.createVectorizedRowBatch(); - vectorAssignRowSameBatch = new VectorAssignRowSameBatch(); - vectorAssignRowSameBatch.init((StructObjectInspector) outputObjInspector, vOutContext.getProjectedColumns()); - vectorAssignRowSameBatch.setOneBatch(outputBatch); + vectorAssignRow = new VectorAssignRow(); + vectorAssignRow.init((StructObjectInspector) outputObjInspector, vOutContext.getProjectedColumns()); } } catch (HiveException he) { @@ -911,11 +910,11 @@ private void writeSingleRow(VectorHashKeyWrapper kw, VectorAggregationBufferRow } else { // Output keys and aggregates into the output batch. for (int i = 0; i < outputKeyLength; ++i) { - vectorAssignRowSameBatch.assignRowColumn(outputBatch.size, fi++, + vectorAssignRow.assignRowColumn(outputBatch, outputBatch.size, fi++, keyWrappersBatch.getWritableKeyValue (kw, i, keyOutputWriters[i])); } for (int i = 0; i < aggregators.length; ++i) { - vectorAssignRowSameBatch.assignRowColumn(outputBatch.size, fi++, + vectorAssignRow.assignRowColumn(outputBatch, outputBatch.size, fi++, aggregators[i].evaluateOutput(agg.getAggregationBuffer(i))); } ++outputBatch.size; @@ -936,7 +935,7 @@ private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buff throws HiveException { int fi = outputKeyLength; // Start after group keys. for (int i = 0; i < aggregators.length; ++i) { - vectorAssignRowSameBatch.assignRowColumn(outputBatch.size, fi++, + vectorAssignRow.assignRowColumn(outputBatch, outputBatch.size, fi++, aggregators[i].evaluateOutput(agg.getAggregationBuffer(i))); } ++outputBatch.size; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java index 6bed52f..902a183 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java @@ -59,7 +59,7 @@ protected transient VectorizedRowBatch outputBatch; protected transient VectorizedRowBatch scratchBatch; // holds restored (from disk) big table rows - protected transient Map outputVectorAssignRowMap; + protected transient Map outputVectorAssignRowMap; protected transient VectorizedRowBatchCtx vrbCtx = null; @@ -100,7 +100,7 @@ public void initializeOp(Configuration hconf) throws HiveException { outputBatch = vrbCtx.createVectorizedRowBatch(); - outputVectorAssignRowMap = new HashMap(); + outputVectorAssignRowMap = new HashMap(); } /** @@ -109,15 +109,14 @@ public void initializeOp(Configuration hconf) throws HiveException { @Override protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { Object[] values = (Object[]) row; - VectorAssignRowSameBatch va = outputVectorAssignRowMap.get(outputOI); + VectorAssignRow va = outputVectorAssignRowMap.get(outputOI); if (va == null) { - va = new VectorAssignRowSameBatch(); + va = new VectorAssignRow(); va.init((StructObjectInspector) outputOI, vOutContext.getProjectedColumns()); - va.setOneBatch(outputBatch); outputVectorAssignRowMap.put(outputOI, va); } - va.assignRow(outputBatch.size, values); + va.assignRow(outputBatch, outputBatch.size, values); ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 622f777..127d03a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -180,7 +180,9 @@ protected Object _evaluate(Object row, int version) throws HiveException { joinValues[posBigTable] = vectorNodeEvaluators; // Filtering is handled in the input batch processing - filterMaps[posBigTable] = null; + if (filterMaps != null) { + filterMaps[posBigTable] = null; + } } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOuterFilteredOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOuterFilteredOperator.java index 509a43f..1c8911d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOuterFilteredOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOuterFilteredOperator.java @@ -45,7 +45,7 @@ private transient boolean firstBatch; - private transient VectorExtractRowDynBatch vectorExtractRowDynBatch; + private transient VectorExtractRow vectorExtractRow; protected transient Object[] singleRow; @@ -94,33 +94,28 @@ public void process(Object data, int tag) throws HiveException { } if (firstBatch) { - vectorExtractRowDynBatch = new VectorExtractRowDynBatch(); - vectorExtractRowDynBatch.init((StructObjectInspector) inputObjInspectors[0], vContext.getProjectedColumns()); + vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init((StructObjectInspector) inputObjInspectors[0], vContext.getProjectedColumns()); - singleRow = new Object[vectorExtractRowDynBatch.getCount()]; + singleRow = new Object[vectorExtractRow.getCount()]; firstBatch = false; } - - vectorExtractRowDynBatch.setBatchOnEntry(batch); - // VectorizedBatchUtil.debugDisplayBatch( batch, "VectorReduceSinkOperator processOp "); if (batch.selectedInUse) { int selected[] = batch.selected; for (int logical = 0 ; logical < batch.size; logical++) { int batchIndex = selected[logical]; - vectorExtractRowDynBatch.extractRow(batchIndex, singleRow); + vectorExtractRow.extractRow(batch, batchIndex, singleRow); super.process(singleRow, tag); } } else { for (int batchIndex = 0 ; batchIndex < batch.size; batchIndex++) { - vectorExtractRowDynBatch.extractRow(batchIndex, singleRow); + vectorExtractRow.extractRow(batch, batchIndex, singleRow); super.process(singleRow, tag); } } - - vectorExtractRowDynBatch.forgetBatchOnExit(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java index 033be38..fce6885 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java @@ -19,15 +19,321 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.exec.MapOperator; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde; +import org.apache.hadoop.hive.ql.io.orc.OrcStruct; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; +import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BinaryComparable; import org.apache.hadoop.io.Writable; -public class VectorMapOperator extends MapOperator { +public class VectorMapOperator extends AbstractMapOperator { private static final long serialVersionUID = 1L; + private transient HashMap fileToPartitionContextMap; + + private transient Operator oneRootOperator; + + private transient TypeInfo tableStructTypeInfo; + private transient StandardStructObjectInspector tableStandardStructObjectInspector; + + private transient TypeInfo[] tableRowTypeInfos; + + private transient VectorMapOperatorReadType currentReadType; + private transient PartitionContext currentPartContext; + + private transient int currentDataColumnCount; + + private transient DeserializeRead currentDeserializeRead; + private transient VectorDeserializeRow currentVectorDeserializeRow; + + private Deserializer currentPartDeserializer; + private StructObjectInspector currentPartRawRowObjectInspector; + private VectorAssignRow currentVectorAssign; + + private transient VectorizedRowBatchCtx batchContext; + private transient VectorizedRowBatch vectorizedInputFileFormatBatch; + private transient VectorizedRowBatch batch; + private transient long batchCounter; + + private transient int dataColumnCount; + private transient int partitionColumnCount; + private transient Object[] partitionValues; + + private transient boolean[] columnsToIncludeTruncated; + + protected abstract class PartitionContext { + + protected final PartitionDesc partDesc; + + String tableName; + String partName; + + private PartitionContext(PartitionDesc partDesc) { + this.partDesc = partDesc; + + TableDesc td = partDesc.getTableDesc(); + + // Use table properties in case of unpartitioned tables, + // and the union of table properties and partition properties, with partition + // taking precedence, in the case of partitioned tables + Properties overlayedProps = + SerDeUtils.createOverlayedProperties(td.getProperties(), partDesc.getProperties()); + + Map partSpec = partDesc.getPartSpec(); + + tableName = String.valueOf(overlayedProps.getProperty("name")); + partName = String.valueOf(partSpec); + + } + + public PartitionDesc getPartDesc() { + return partDesc; + } + + public abstract void init(Configuration hconf) + throws SerDeException, Exception; + } + + protected class VectorizedInputFileFormatPartitionContext extends PartitionContext { + + private VectorizedInputFileFormatPartitionContext(PartitionDesc partDesc) { + super(partDesc); + } + + public void init(Configuration hconf) { + } + } + + protected class VectorDeserializePartitionContext extends PartitionContext { + + // This helper object deserializes known deserialization / input file format combination into + // columns of a row in a vectorized row batch. + private VectorDeserializeRow vectorDeserializeRow; + + private DeserializeRead deserializeRead; + + private VectorDeserializePartitionContext(PartitionDesc partDesc) { + super(partDesc); + } + + public VectorDeserializeRow getVectorDeserializeRow() { + return vectorDeserializeRow; + } + + DeserializeRead getDeserializeRead() { + return deserializeRead; + } + + public void init(Configuration hconf) + throws SerDeException, HiveException { + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); + + // This type information specifies the data types the partition needs to read. + TypeInfo[] dataTypeInfos = vectorPartDesc.getTypeInfos(); + + boolean[] conversionFlags = vectorPartDesc.getConversionFlags(); + + switch (vectorPartDesc.getVectorDeserializeType()) { + case LAZY_SIMPLE: + { + LazySerDeParameters simpleSerdeParams = + new LazySerDeParameters(hconf, partDesc.getTableDesc().getProperties(), + LazySimpleSerDe.class.getName()); + + // The LazySimple deserialization will fill in the type we want for the table. + // Hence, we pass tableRowPrimitiveTypeInfos. + LazySimpleDeserializeRead lazySimpleDeserializeRead = + new LazySimpleDeserializeRead(dataTypeInfos, simpleSerdeParams); + + vectorDeserializeRow = + new VectorDeserializeRow(lazySimpleDeserializeRead); + + deserializeRead = lazySimpleDeserializeRead; + } + break; + + case LAZY_BINARY: + { + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + new LazyBinaryDeserializeRead(dataTypeInfos); + + vectorDeserializeRow = + new VectorDeserializeRow(lazyBinaryDeserializeRead); + + deserializeRead = lazyBinaryDeserializeRead; + } + break; + + default: + throw new RuntimeException( + "Unexpected vector deserialize row type " + vectorPartDesc.getVectorDeserializeType().name()); + } + + // Initialize with data type conversion parameters. + vectorDeserializeRow.init(dataTypeInfos, tableRowTypeInfos, conversionFlags, + columnsToIncludeTruncated); + } + } + + protected class RowDeserializePartitionContext extends PartitionContext { + + private Deserializer partDeserializer; + private StructObjectInspector partRawRowObjectInspector; + private VectorAssignRow vectorAssign; + + private RowDeserializePartitionContext(PartitionDesc partDesc) { + super(partDesc); + } + + public Deserializer getPartDeserializer() { + return partDeserializer; + } + + public StructObjectInspector getPartRawRowObjectInspector() { + return partRawRowObjectInspector; + } + + public VectorAssignRow getVectorAssign() { + return vectorAssign; + } + + public void init(Configuration hconf) + throws Exception { + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); + + partDeserializer = partDesc.getDeserializer(hconf); + + // UNDONE: Stop using the columns / columns.types variables. + if (partDeserializer instanceof OrcSerde) { + + // It turns out the columns / columns.types environment variables we set up instruct + // ORC to use the table schema even when hive.exec.schema.evolution is OFF... + partRawRowObjectInspector = + (StructObjectInspector) OrcStruct.createObjectInspector(tableStructTypeInfo); + + } else { + partRawRowObjectInspector = + (StructObjectInspector) partDeserializer.getObjectInspector(); + } + + boolean[] conversionFlags= vectorPartDesc.getConversionFlags(); + + TypeInfo[] dataTypeInfos = vectorPartDesc.getTypeInfos(); + + vectorAssign = new VectorAssignRow(); + + // Initialize with data type conversion parameters. + vectorAssign.init(dataTypeInfos, tableRowTypeInfos, conversionFlags, + columnsToIncludeTruncated); + } + } + + public PartitionContext CreateAndInitPartitionContext(PartitionDesc partDesc, + Configuration hconf) + throws SerDeException, Exception { + + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); + if (vectorPartDesc == null) { + LOG.info("debug"); + } + PartitionContext partitionContext; + VectorMapOperatorReadType vectorMapOperatorReadType = + vectorPartDesc.getVectorMapOperatorReadType(); + switch (vectorMapOperatorReadType) { + case VECTORIZED_INPUT_FILE_FORMAT: + partitionContext = new VectorizedInputFileFormatPartitionContext(partDesc); + break; + + case VECTOR_DESERIALIZE: + partitionContext = new VectorDeserializePartitionContext(partDesc); + break; + + case ROW_DESERIALIZE: + partitionContext = new RowDeserializePartitionContext(partDesc); + break; + + default: + throw new RuntimeException("Unexpected vector MapOperator read type " + + vectorMapOperatorReadType.name()); + } + + partitionContext.init(hconf); + + return partitionContext; + } + + private void determineColumnsToInclude(Configuration hconf) { + + columnsToIncludeTruncated = null; + + List columnsToIncludeTruncatedList = ColumnProjectionUtils.getReadColumnIDs(hconf); + if (columnsToIncludeTruncatedList != null && + columnsToIncludeTruncatedList.size() > 0 && columnsToIncludeTruncatedList.size() < dataColumnCount ) { + + // Partitioned columns will not be in the include list. + + boolean[] columnsToInclude = new boolean[dataColumnCount]; + Arrays.fill(columnsToInclude, false); + for (int columnNum : columnsToIncludeTruncatedList) { + columnsToInclude[columnNum] = true; + } + + // Work backwards to find the highest wanted column. + + int highestWantedColumnNum = -1; + for (int i = dataColumnCount - 1; i >= 0; i--) { + if (columnsToInclude[i]) { + highestWantedColumnNum = i; + break; + } + } + if (highestWantedColumnNum == -1) { + throw new RuntimeException("No columns to include?"); + } + int newColumnCount = highestWantedColumnNum + 1; + if (newColumnCount == dataColumnCount) { + columnsToIncludeTruncated = columnsToInclude; + } else { + columnsToIncludeTruncated = Arrays.copyOf(columnsToInclude, newColumnCount); + } + } + } + /** Kryo ctor. */ protected VectorMapOperator() { super(); @@ -37,29 +343,344 @@ public VectorMapOperator(CompilationOpContext ctx) { super(ctx); } + // Create a file to VectorFileContext map. + // Where VectorFileContext describes how to process "rows" (could be VRBs). + // + @Override + public void setChildren(Configuration hconf) throws Exception { + + // Get the one TableScanOperator. + oneRootOperator = conf.getAliasToWork().values().iterator().next(); + + currentReadType = VectorMapOperatorReadType.NONE; + + determineColumnsToInclude(hconf); + + // UNDONE: Put this in the MapWork to make it available to Pass-Thru VectorizedInputFileFormat + // UNDONE: readers. + // UNDONE: + batchContext = conf.getVectorizedRowBatchCtx(); + + // For now, use separate batches.... + vectorizedInputFileFormatBatch = batchContext.createVectorizedRowBatch(columnsToIncludeTruncated); + conf.setVectorizedRowBatch(vectorizedInputFileFormatBatch); + + batch = batchContext.createVectorizedRowBatch(columnsToIncludeTruncated); + + batchCounter = 0; + + dataColumnCount = batchContext.getDataColumnCount(); + partitionColumnCount = batchContext.getPartitionColumnCount(); + partitionValues = new Object[partitionColumnCount]; + + // Create table related objects + tableStructTypeInfo = + TypeInfoFactory.getStructTypeInfo( + Arrays.asList(batchContext.getRowColumnNames()), + Arrays.asList(batchContext.getRowColumnTypeInfos())); + tableStandardStructObjectInspector = + (StandardStructObjectInspector) + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo); + + tableRowTypeInfos = batchContext.getRowColumnTypeInfos(); + + // The Vectorizer class enforces that there is only one TableScanOperator, so + // we don't need the more complicated multiple root operator mapping that MapOperator has. + + fileToPartitionContextMap = new HashMap(); + + // Temporary map so we only create one partition context entry. + HashMap partitionContextMap = + new HashMap(); + + for (Map.Entry> entry : conf.getPathToAliases().entrySet()) { + String path = entry.getKey(); + PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path); + ArrayList aliases = entry.getValue(); + + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); + if (vectorPartDesc == null) { + LOG.info("Ignoring path " + path + " with aliases " + aliases + " since it does not have vector partition descriptor? Did not get examined by the Vectorizer class"); + continue; // UNDONE: Need to understand what the new PartitionDesc is about... + } + LOG.info("VectorMapOperator path: " + path + ", read type " + vectorPartDesc.getVectorMapOperatorReadType().name() + + ", vector deserialize type " + vectorPartDesc.getVectorDeserializeType().name() + ", aliases " + aliases); + + PartitionContext partitionContext; + if (!partitionContextMap.containsKey(partDesc)) { + partitionContext = CreateAndInitPartitionContext(partDesc, hconf); + partitionContextMap.put(partDesc, partitionContext); + } else { + partitionContext = partitionContextMap.get(partDesc); + } + + fileToPartitionContextMap.put(path, partitionContext); + } + + // Create list of one. + List> children = + new ArrayList>(); + children.add(oneRootOperator); + + setChildOperators(children); + } + + @Override + public void initializeMapOperator(Configuration hconf) throws HiveException { + super.initializeMapOperator(hconf); + + oneRootOperator.initialize(hconf, new ObjectInspector[] {tableStandardStructObjectInspector}); + } + + public void initializeContexts() throws HiveException { + Path fpath = getExecContext().getCurrentInputPath(); + String nominalPath = getNominalPath(fpath); + setupPartitionContextVars(nominalPath); + } + + // Find context for current input file + @Override + public void cleanUpInputFileChangedOp() throws HiveException { + super.cleanUpInputFileChangedOp(); + Path fpath = getExecContext().getCurrentInputPath(); + String nominalPath = getNominalPath(fpath); + + setupPartitionContextVars(nominalPath); + + // Add alias, table name, and partitions to hadoop conf so that their + // children will inherit these + oneRootOperator.setInputContext(currentPartContext.tableName, + currentPartContext.partName); + } + + private void setupPartitionContextVars(String nominalPath) throws HiveException { + + currentPartContext = fileToPartitionContextMap.get(nominalPath); + PartitionDesc partDesc = currentPartContext.getPartDesc(); + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); + currentReadType = vectorPartDesc.getVectorMapOperatorReadType(); + + if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) { + + // We will get a pass-thru VectorizedRowBatch as a row from the reader. + + // The reader is responsible for setting the partition columns, resetting the batch, etc. + + currentDataColumnCount = 0; + + currentDeserializeRead = null; + currentVectorDeserializeRow = null; + + currentPartDeserializer = null; + currentPartRawRowObjectInspector = null; + currentVectorAssign = null; + + } else { + + // We will get "regular" rows from the reader. + + if (batch.size > 0) { + + // Clear out any rows in the batch from previous partition since we are going to change + // the repeating partition column values. + + batchCounter++; + // VectorizedBatchUtil.debugDisplayBatch(batch, "*DEBUG TAG* setupPartitionContextVars batch.size > 0 (batchCounter " + batchCounter + ", getOperatorName " + getOperatorName() + ")"); + oneRootOperator.process(batch, 0); + batch.reset(); + if (oneRootOperator.getDone()) { + setDone(true); + return; + } + + } + + currentDataColumnCount = vectorPartDesc.getNonPartColumnCount(); + + if (currentDataColumnCount < dataColumnCount) { + + // Default default any additional data columns to NULL once for the file. + + for (int i = currentDataColumnCount; i < dataColumnCount; i++) { + ColumnVector colVector = batch.cols[i]; + colVector.isNull[0] = true; + colVector.noNulls = false; + colVector.isRepeating = true; + } + } + + if (batchContext.getPartitionColumnCount() > 0) { + + // The partition columns are set once for the partition and are marked repeating. + + VectorizedRowBatchCtx.getPartitionValues(batchContext, partDesc, partitionValues); + batchContext.addPartitionColsToBatch(batch, partitionValues); + } + + switch (currentReadType) { + case VECTOR_DESERIALIZE: + { + VectorDeserializePartitionContext vectorDeserPartContext = + (VectorDeserializePartitionContext) currentPartContext; + + currentDeserializeRead = vectorDeserPartContext.getDeserializeRead(); + currentVectorDeserializeRow = vectorDeserPartContext.getVectorDeserializeRow(); + + currentPartDeserializer = null; + currentPartRawRowObjectInspector = null; + currentVectorAssign = null; + + } + break; + + case ROW_DESERIALIZE: + { + RowDeserializePartitionContext rowDeserPartContext = + (RowDeserializePartitionContext) currentPartContext; + + currentDeserializeRead = null; + currentVectorDeserializeRow = null; + + currentPartDeserializer = rowDeserPartContext.getPartDeserializer(); + currentPartRawRowObjectInspector = rowDeserPartContext.getPartRawRowObjectInspector(); + currentVectorAssign = rowDeserPartContext.getVectorAssign(); + } + break; + + default: + throw new RuntimeException("Unexpected vector MapOperator read type " + + currentReadType.name()); + } + } + } + + @Override + public Deserializer getCurrentDeserializer() { + // Not applicable. + return null; + } + @Override public void process(Writable value) throws HiveException { + // A mapper can span multiple files/partitions. - // The serializers need to be reset if the input file changed + // The PartitionContext need to be changed if the input file changed ExecMapperContext context = getExecContext(); if (context != null && context.inputFileChanged()) { // The child operators cleanup if input file has changed cleanUpInputFileChanged(); } - // The row has been converted to comply with table schema, irrespective of partition schema. - // So, use tblOI (and not partOI) for forwarding - try { - int childrenDone = 0; - for (MapOpCtx current : currentCtxs) { - if (!current.forward(value)) { - childrenDone++; + if (!oneRootOperator.getDone()) { + try { + if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) { + + // We pass-true VectorizedRowBatch as a row. + + batchCounter++; + // VectorizedBatchUtil.debugDisplayBatch((VectorizedRowBatch) value, "*DEBUG TAG* process(Writable value) VECTORIZED_INPUT_FILE_FORMAT (batchCounter " + batchCounter + ", getOperatorName " + getOperatorName() + ")"); + oneRootOperator.process(value, 0); + if (oneRootOperator.getDone()) { + setDone(true); + return; + } + + } else { + + // We have "regular" rows from the reader. Flush the batch if it is full. + + if (batch.size == batch.DEFAULT_SIZE) { + + // Feed full batch to operator tree. + batchCounter++; + // VectorizedBatchUtil.debugDisplayBatch(batch, "*DEBUG TAG* process(Writable value) batch.size == batch.DEFAULT_SIZE (batchCounter " + batchCounter + ", getOperatorName " + getOperatorName() + ")"); + oneRootOperator.process(batch, 0); + + /** + * Only reset the current data columns. Not any data columns defaulted to NULL + * because they are not present in the partition, and not partition columns. + */ + for (int c = 0; c < currentDataColumnCount; c++) { + batch.cols[c].reset(); + batch.cols[c].init(); + } + batch.selectedInUse = false; + batch.size = 0; + batch.endOfFile = false; + + if (oneRootOperator.getDone()) { + setDone(true); + return; + } + } + + switch (currentReadType) { + case VECTOR_DESERIALIZE: + { + BinaryComparable binComp = (BinaryComparable) value; + currentDeserializeRead.set(binComp.getBytes(), 0, binComp.getLength()); + + currentVectorDeserializeRow.deserializeByValue(batch, batch.size); + } + break; + + case ROW_DESERIALIZE: + { + Object deserialized = currentPartDeserializer.deserialize(value); + currentVectorAssign.assignRow(batch, batch.size, deserialized, + currentPartRawRowObjectInspector); + } + break; + + default: + throw new RuntimeException("Unexpected vector MapOperator read type " + + currentReadType.name()); + } + batch.size++; } + } catch (Exception e) { + throw new HiveException("Hive Runtime Error while processing row ", e); } + } + } + + @Override + public void process(Object row, int tag) throws HiveException { + throw new HiveException("Hive 2 Internal error: should not be called!"); + } - rowsForwarded(childrenDone, ((VectorizedRowBatch)value).size); - } catch (Exception e) { - throw new HiveException("Hive Runtime Error while processing row ", e); + @Override + public void closeOp(boolean abort) throws HiveException { + if (!abort && oneRootOperator != null && !oneRootOperator.getDone() && + currentReadType != VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) { + if (batch.size > 0) { + batchCounter++; + // VectorizedBatchUtil.debugDisplayBatch(batch, "*DEBUG TAG* closeOp batch.size > 0 (batchCounter " + batchCounter + ", getOperatorName " + getOperatorName() + ")"); + oneRootOperator.process(batch, 0); + batch.size = 0; + } } + super.closeOp(abort); + } + + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "MAP"; + } + + @Override + public OperatorType getType() { + return null; + } + + @Override + public void initEmptyInputChildren(List> children, + Configuration hconf) throws SerDeException, Exception { + // UNDONE ???? + // TODO Auto-generated method stub } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java index b79a3d8..e45d31c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java @@ -41,7 +41,7 @@ private transient boolean firstBatch; - private transient VectorExtractRowDynBatch vectorExtractRowDynBatch; + private transient VectorExtractRow vectorExtractRow; protected transient Object[] singleRow; @@ -81,32 +81,28 @@ public void process(Object data, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) data; if (firstBatch) { - vectorExtractRowDynBatch = new VectorExtractRowDynBatch(); - vectorExtractRowDynBatch.init((StructObjectInspector) inputObjInspectors[0], vContext.getProjectedColumns()); + vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init((StructObjectInspector) inputObjInspectors[0], vContext.getProjectedColumns()); - singleRow = new Object[vectorExtractRowDynBatch.getCount()]; + singleRow = new Object[vectorExtractRow.getCount()]; firstBatch = false; } - vectorExtractRowDynBatch.setBatchOnEntry(batch); - // VectorizedBatchUtil.debugDisplayBatch( batch, "VectorReduceSinkOperator processOp "); if (batch.selectedInUse) { int selected[] = batch.selected; for (int logical = 0 ; logical < batch.size; logical++) { int batchIndex = selected[logical]; - vectorExtractRowDynBatch.extractRow(batchIndex, singleRow); + vectorExtractRow.extractRow(batch, batchIndex, singleRow); super.process(singleRow, tag); } } else { for (int batchIndex = 0 ; batchIndex < batch.size; batchIndex++) { - vectorExtractRowDynBatch.extractRow(batchIndex, singleRow); + vectorExtractRow.extractRow(batch, batchIndex, singleRow); super.process(singleRow, tag); } } - - vectorExtractRowDynBatch.forgetBatchOnExit(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index 9a263e6..e1ca139 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -76,7 +76,7 @@ private transient VectorHashKeyWrapperBatch keyWrapperBatch; - private transient Map outputVectorAssignRowMap; + private transient Map outputVectorAssignRowMap; private transient int batchIndex = -1; @@ -158,7 +158,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); - outputVectorAssignRowMap = new HashMap(); + outputVectorAssignRowMap = new HashMap(); // This key evaluator translates from the vectorized VectorHashKeyWrapper format // into the row-mode MapJoinKey @@ -286,15 +286,14 @@ public void closeOp(boolean aborted) throws HiveException { @Override protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { Object[] values = (Object[]) row; - VectorAssignRowSameBatch va = outputVectorAssignRowMap.get(outputOI); + VectorAssignRow va = outputVectorAssignRowMap.get(outputOI); if (va == null) { - va = new VectorAssignRowSameBatch(); + va = new VectorAssignRow(); va.init((StructObjectInspector) outputOI, vOutContext.getProjectedColumns()); - va.setOneBatch(outputBatch); outputVectorAssignRowMap.put(outputOI, va); } - va.assignRow(outputBatch.size, values); + va.assignRow(outputBatch, outputBatch.size, values); ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSparkHashTableSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSparkHashTableSinkOperator.java index 1e550e7..55e6e8b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSparkHashTableSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSparkHashTableSinkOperator.java @@ -47,7 +47,7 @@ private transient boolean firstBatch; - private transient VectorExtractRowDynBatch vectorExtractRowDynBatch; + private transient VectorExtractRow vectorExtractRow; protected transient Object[] singleRow; @@ -82,28 +82,26 @@ public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; if (firstBatch) { - vectorExtractRowDynBatch = new VectorExtractRowDynBatch(); - vectorExtractRowDynBatch.init((StructObjectInspector) inputObjInspectors[0], vContext.getProjectedColumns()); + vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init((StructObjectInspector) inputObjInspectors[0], vContext.getProjectedColumns()); - singleRow = new Object[vectorExtractRowDynBatch.getCount()]; + singleRow = new Object[vectorExtractRow.getCount()]; firstBatch = false; } - vectorExtractRowDynBatch.setBatchOnEntry(batch); + if (batch.selectedInUse) { int selected[] = batch.selected; for (int logical = 0 ; logical < batch.size; logical++) { int batchIndex = selected[logical]; - vectorExtractRowDynBatch.extractRow(batchIndex, singleRow); + vectorExtractRow.extractRow(batch, batchIndex, singleRow); super.process(singleRow, tag); } } else { for (int batchIndex = 0 ; batchIndex < batch.size; batchIndex++) { - vectorExtractRowDynBatch.extractRow(batchIndex, singleRow); + vectorExtractRow.extractRow(batch, batchIndex, singleRow); super.process(singleRow, tag); } } - - vectorExtractRowDynBatch.forgetBatchOnExit(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSparkPartitionPruningSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSparkPartitionPruningSinkOperator.java index 2f02250..bc2efec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSparkPartitionPruningSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSparkPartitionPruningSinkOperator.java @@ -43,7 +43,7 @@ protected transient boolean firstBatch; - protected transient VectorExtractRowDynBatch vectorExtractRowDynBatch; + protected transient VectorExtractRow vectorExtractRow; protected transient Object[] singleRow; @@ -77,27 +77,24 @@ public void initializeOp(Configuration hconf) throws HiveException { public void process(Object data, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) data; if (firstBatch) { - vectorExtractRowDynBatch = new VectorExtractRowDynBatch(); - vectorExtractRowDynBatch.init((StructObjectInspector) inputObjInspectors[0], + vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init((StructObjectInspector) inputObjInspectors[0], vContext.getProjectedColumns()); - singleRow = new Object[vectorExtractRowDynBatch.getCount()]; + singleRow = new Object[vectorExtractRow.getCount()]; firstBatch = false; } - vectorExtractRowDynBatch.setBatchOnEntry(batch); ObjectInspector rowInspector = inputObjInspectors[0]; try { Writable writableRow; for (int logical = 0; logical < batch.size; logical++) { int batchIndex = batch.selectedInUse ? batch.selected[logical] : logical; - vectorExtractRowDynBatch.extractRow(batchIndex, singleRow); + vectorExtractRow.extractRow(batch, batchIndex, singleRow); writableRow = serializer.serialize(singleRow, rowInspector); writableRow.write(buffer); } } catch (Exception e) { throw new HiveException(e); } - - vectorExtractRowDynBatch.forgetBatchOnExit(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 30a0f5a..ba72228 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -68,13 +68,11 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopLong; @@ -170,10 +168,6 @@ public VectorizationContext(String contextName, List initialColumnNames) { this.contextName = contextName; level = 0; - if (LOG.isDebugEnabled()) { - LOG.debug("VectorizationContext consructor contextName " + contextName + " level " - + level + " initialColumnNames " + initialColumnNames); - } this.initialColumnNames = initialColumnNames; this.projectionColumnNames = initialColumnNames; @@ -194,9 +188,6 @@ public VectorizationContext(String contextName, List initialColumnNames) public VectorizationContext(String contextName) { this.contextName = contextName; level = 0; - if (LOG.isDebugEnabled()) { - LOG.debug("VectorizationContext consructor contextName " + contextName + " level " + level); - } initialColumnNames = new ArrayList(); projectedColumns = new ArrayList(); projectionColumnNames = new ArrayList(); @@ -212,7 +203,6 @@ public VectorizationContext(String contextName) { public VectorizationContext(String contextName, VectorizationContext vContext) { this.contextName = contextName; level = vContext.level + 1; - LOG.info("VectorizationContext consructor reference contextName " + contextName + " level " + level); this.initialColumnNames = vContext.initialColumnNames; this.projectedColumns = new ArrayList(); this.projectionColumnNames = new ArrayList(); @@ -484,7 +474,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th throw new HiveException("Could not vectorize expression: "+exprDesc.getName()); } if (LOG.isDebugEnabled()) { - LOG.debug("Input Expression = " + exprDesc.getTypeInfo() + LOG.debug("Input Expression = " + exprDesc.toString() + ", Vectorized Expression = " + ve.toString()); } return ve; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 46a5413..cfedf35 100755 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -205,8 +205,8 @@ public void configure(JobConf job) { if (!HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ENABLED, LlapProxy.isDaemon())) { return inputFormat; // LLAP not enabled, no-op. } - boolean isSupported = inputFormat instanceof LlapWrappableInputFormatInterface, - isVectorized = Utilities.isVectorMode(conf); + boolean isSupported = inputFormat instanceof LlapWrappableInputFormatInterface; + boolean isVectorized = Utilities.getUseVectorizedInputFileFormat(conf); if (!isSupported || !isVectorized) { LOG.info("Not using llap for " + inputFormat + ": supported = " + isSupported + ", vectorized = " + isVectorized); @@ -225,7 +225,7 @@ public void configure(JobConf job) { } public static boolean canWrapAnyForLlap(Configuration conf, MapWork mapWork) { - return Utilities.isVectorMode(conf, mapWork); + return Utilities.getUseVectorizedInputFileFormat(conf, mapWork); } public static boolean canWrapForLlap(Class inputFormatClass) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java index c53d149..80858a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java @@ -72,7 +72,7 @@ public DummyInputSplit(String path) { private boolean addPartitionCols = true; public NullRowsRecordReader(Configuration conf, InputSplit split) throws IOException { - boolean isVectorMode = Utilities.isVectorMode(conf); + boolean isVectorMode = Utilities.getUseVectorizedInputFileFormat(conf); if (LOG.isDebugEnabled()) { LOG.debug("Null record reader in " + (isVectorMode ? "" : "non-") + "vector mode"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index fe0be7b..3cdd79b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -452,7 +452,7 @@ public boolean validateInput(FileSystem fs, HiveConf conf, List files ) throws IOException { - if (Utilities.isVectorMode(conf)) { + if (Utilities.getUseVectorizedInputFileFormat(conf)) { return new VectorizedOrcInputFormat().validateInput(fs, conf, files); } @@ -1640,7 +1640,7 @@ private static void scheduleSplits(ETLSplitStrategy splitStrategy, Context conte public org.apache.hadoop.mapred.RecordReader getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { - boolean vectorMode = Utilities.isVectorMode(conf); + boolean vectorMode = Utilities.getUseVectorizedInputFileFormat(conf); boolean isAcidRead = isAcidRead(conf, inputSplit); if (!isAcidRead) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java index a4e35cb..5b65e5c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -59,7 +59,7 @@ protected MapredParquetInputFormat(final ParquetInputFormat input final org.apache.hadoop.mapred.Reporter reporter ) throws IOException { try { - if (Utilities.isVectorMode(job)) { + if (Utilities.getUseVectorizedInputFileFormat(job)) { if (LOG.isDebugEnabled()) { LOG.debug("Using vectorized record reader"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index d806b97..8976ce1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; @@ -113,6 +114,7 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; @@ -159,8 +161,12 @@ import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -168,6 +174,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.mapred.SequenceFileInputFormat; +import org.apache.hadoop.mapred.TextInputFormat; import com.google.common.base.Joiner; @@ -339,6 +347,7 @@ public Vectorizer() { List columnNames; List typeInfos; int partitionColumnCount; + boolean useVectorizedInputFileFormat; String[] scratchTypeNameArray; @@ -360,7 +369,9 @@ public void setPartitionColumnCount(int partitionColumnCount) { public void setScratchTypeNameArray(String[] scratchTypeNameArray) { this.scratchTypeNameArray = scratchTypeNameArray; } - + public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { + this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + } public void setNonVectorizedOps(Set> nonVectorizedOps) { this.nonVectorizedOps = nonVectorizedOps; } @@ -381,6 +392,8 @@ public void transferToBaseWork(BaseWork baseWork) { partitionColumnCount, scratchTypeNameArray); baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); + + baseWork.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); } } @@ -497,30 +510,107 @@ private String getColumns(List columnNames, int start, int length, return Joiner.on(separator).join(columnNames.subList(start, start + length)); } - private String getTypes(List typeInfos, int start, int length) { - return TypeInfoUtils.getTypesString(typeInfos.subList(start, start + length)); - } + private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable, + boolean useVectorizedInputFileFormat, boolean useVectorDeserialize, + boolean useRowDeserialize) { - private boolean verifyAndSetVectorPartDesc(PartitionDesc pd) { + String inputFileFormatClassName = pd.getInputFileFormatClassName(); // Look for Pass-Thru case where InputFileFormat has VectorizedInputFormatInterface // and reads VectorizedRowBatch as a "row". - if (Utilities.isInputFileFormatVectorized(pd)) { + if (useVectorizedInputFileFormat) { + if (Utilities.isInputFileFormatVectorized(pd)) { + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorizedInputFileFormat(inputFileFormatClassName)); + + return true; + } + } + + /** + * When the table is ACID, then we can only vectorized with the input file format... + */ + if (isAcidTable) { + + LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized"); + + return false; + } + + String deserializerClassName = pd.getDeserializerClassName(); + + // Look for InputFileFormat / Serde combinations we can deserialize more efficiently + // using VectorDeserializeRow and a deserialize class with the DeserializeRead interface. + // + // Do the "vectorized" row-by-row deserialization into a VectorizedRowBatch in the + // VectorMapOperator. - pd.setVectorPartitionDesc(VectorPartitionDesc.createVectorizedInputFileFormat()); + if (useVectorDeserialize) { + + // Currently, we support LazySimple deserialization: + // + // org.apache.hadoop.mapred.TextInputFormat + // org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + // + // AND + // + // org.apache.hadoop.mapred.SequenceFileInputFormat + // org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + if (inputFileFormatClassName.equals(TextInputFormat.class.getName()) && + deserializerClassName.equals(LazySimpleSerDe.class.getName())) { + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorDeserialize( + inputFileFormatClassName, VectorDeserializeType.LAZY_SIMPLE)); + + return true; + } else if (inputFileFormatClassName.equals(SequenceFileInputFormat.class.getName()) && + deserializerClassName.equals(LazyBinarySerDe.class.getName())) { + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorDeserialize( + inputFileFormatClassName, VectorDeserializeType.LAZY_BINARY)); + + return true; + } + } + + // Otherwise, if enabled, deserialize rows using regular Serde and add the object + // inspect-able Object[] row to a VectorizedRowBatch in the VectorMapOperator. + + if (useRowDeserialize) { + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createRowDeserialize( + inputFileFormatClassName, deserializerClassName)); return true; + } - LOG.info("Input format: " + pd.getInputFileFormatClassName() - + ", doesn't provide vectorized input"); + LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized"); return false; } private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String alias, - TableScanOperator tableScanOperator, VectorTaskColumnInfo vectorTaskColumnInfo) { + TableScanOperator tableScanOperator, VectorTaskColumnInfo vectorTaskColumnInfo) + throws SemanticException { + + boolean isAcidTable = tableScanOperator.getConf().isAcidTable(); + + boolean useVectorizedInputFileFormat = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT); + boolean useVectorDeserialize = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE); + boolean useRowDeserialize = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE); // These names/types are the data columns plus partition columns. final List allColumnNameList = new ArrayList(); @@ -561,27 +651,13 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al // We seen this already. continue; } - if (!verifyAndSetVectorPartDesc(partDesc)) { + if (!verifyAndSetVectorPartDesc(partDesc, isAcidTable, useVectorizedInputFileFormat, + useVectorDeserialize, useRowDeserialize)) { return false; } VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); - LOG.info("Vectorizer path: " + path + ", read type " + - vectorPartDesc.getVectorMapOperatorReadType().name() + ", aliases " + aliases); - - Properties partProps = partDesc.getProperties(); - - String nextDataColumnsString = - partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS); - String[] nextDataColumns = nextDataColumnsString.split(","); - - String nextDataTypesString = - partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES); - - // We convert to an array of TypeInfo using a library routine since it parses the information - // and can handle use of different separators, etc. We cannot use the raw type string - // for comparison in the map because of the different separators used. - List nextDataTypeInfoList = - TypeInfoUtils.getTypeInfosFromTypeString(nextDataTypesString); + LOG.info("Vectorizer path: " + path + ", " + vectorPartDesc.toString() + + ", aliases " + aliases); if (isFirst) { @@ -608,66 +684,104 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al isFirst = false; } - ImmutablePair columnNamesAndTypesCombination = - new ImmutablePair(nextDataColumnsString, nextDataTypeInfoList); + Deserializer deserializer; + try { + deserializer = partDesc.getDeserializer(hiveConf); + } catch (Exception e) { + throw new SemanticException(e); + } - boolean[] conversionFlags; - if (conversionMap.containsKey(columnNamesAndTypesCombination)) { + List nextDataTypeInfoList; + // UNDONE: Don't use columns / columns.types in ORC. + if (deserializer instanceof OrcSerde) { - conversionFlags = conversionMap.get(columnNamesAndTypesCombination); + // It turns out the columns / columns.types environment variables we set up instruct + // ORC to use the table schema even when hive.exec.schema.evolution is OFF... + nextDataTypeInfoList = dataTypeInfoList; } else { - List nextDataColumnList = Arrays.asList(nextDataColumns); + // Get the column and types from the SerDe to let it decide what is available + // (e.g. Avro provides the table schema and ignores the partition schema..). + // + StructObjectInspector partObjectInspector; + try { + partObjectInspector = (StructObjectInspector) deserializer.getObjectInspector(); + } catch (Exception e) { + throw new SemanticException(e); + } + String nextDataColumnsString = ObjectInspectorUtils.getFieldNames(partObjectInspector); + String nextDataTypesString = ObjectInspectorUtils.getFieldTypes(partObjectInspector); - // Validate the column names that are present are the same. Missing columns will be - // implicitly defaulted to null. + String[] nextDataColumns = nextDataColumnsString.split(","); - if (nextDataColumnList.size() > dataColumnList.size()) { - LOG.info( - String.format("Could not vectorize partition %s. The partition column names %d is greater than the number of table columns %d", - path, nextDataColumnList.size(), dataColumnList.size())); - return false; - } - for (int i = 0; i < nextDataColumnList.size(); i++) { - String nextColumnName = nextDataColumnList.get(i); - String tableColumnName = dataColumnList.get(i); - if (!nextColumnName.equals(tableColumnName)) { + // We convert to an array of TypeInfo using a library routine since it parses the information + // and can handle use of different separators, etc. We cannot use the raw type string + // for comparison in the map because of the different separators used. + nextDataTypeInfoList = + TypeInfoUtils.getTypeInfosFromTypeString(nextDataTypesString); + + ImmutablePair columnNamesAndTypesCombination = + new ImmutablePair(nextDataColumnsString, nextDataTypeInfoList); + + boolean[] conversionFlags; + if (conversionMap.containsKey(columnNamesAndTypesCombination)) { + + conversionFlags = conversionMap.get(columnNamesAndTypesCombination); + + } else { + + List nextDataColumnList = Arrays.asList(nextDataColumns); + + // Validate the column names that are present are the same. Missing columns will be + // implicitly defaulted to null. + + if (nextDataColumnList.size() > dataColumnList.size()) { LOG.info( - String.format("Could not vectorize partition %s. The partition column name %s is does not match table column name %s", - path, nextColumnName, tableColumnName)); + String.format("Could not vectorize partition %s. The partition column names %d is greater than the number of table columns %d", + path, nextDataColumnList.size(), dataColumnList.size())); return false; } - } + for (int i = 0; i < nextDataColumnList.size(); i++) { + String nextColumnName = nextDataColumnList.get(i); + String tableColumnName = dataColumnList.get(i); + if (!nextColumnName.equals(tableColumnName)) { + LOG.info( + String.format("Could not vectorize partition %s. The partition column name %s is does not match table column name %s", + path, nextColumnName, tableColumnName)); + return false; + } + } - // The table column types might have been changed with ALTER. There are restrictions - // here for vectorization. + // The table column types might have been changed with ALTER. There are restrictions + // here for vectorization. - // Some readers / deserializers take responsibility for conversion themselves. + // Some readers / deserializers take responsibility for conversion themselves. - // If we need to check for conversion, the conversion object may come back null - // indicating from a vectorization point of view the conversion is implicit. That is, - // all implicit integer upgrades. + // If we need to check for conversion, the conversion object may come back null + // indicating from a vectorization point of view the conversion is implicit. That is, + // all implicit integer upgrades. - if (vectorPartDesc.getNeedsDataTypeConversionCheck() && - !nextDataTypeInfoList.equals(dataTypeInfoList)) { + if (vectorPartDesc.getNeedsDataTypeConversionCheck() && + !nextDataTypeInfoList.equals(dataTypeInfoList)) { - // The results will be in 2 members: validConversion and conversionFlags - partitionConversion.validateConversion(nextDataTypeInfoList, dataTypeInfoList); - if (!partitionConversion.getValidConversion()) { - return false; - } - conversionFlags = partitionConversion.getResultConversionFlags(); - } else { - conversionFlags = null; - } + // The results will be in 2 members: validConversion and conversionFlags + partitionConversion.validateConversion(nextDataTypeInfoList, dataTypeInfoList); + if (!partitionConversion.getValidConversion()) { + return false; + } + conversionFlags = partitionConversion.getResultConversionFlags(); + } else { + conversionFlags = null; + } - // We enter this in our map so we don't have to check again for subsequent partitions. + // We enter this in our map so we don't have to check again for subsequent partitions. - conversionMap.put(columnNamesAndTypesCombination, conversionFlags); - } + conversionMap.put(columnNamesAndTypesCombination, conversionFlags); + } - vectorPartDesc.setConversionFlags(conversionFlags); + vectorPartDesc.setConversionFlags(conversionFlags); + } vectorPartDesc.setTypeInfos(nextDataTypeInfoList); } @@ -675,6 +789,7 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al vectorTaskColumnInfo.setColumnNames(allColumnNameList); vectorTaskColumnInfo.setTypeInfos(allTypeInfoList); vectorTaskColumnInfo.setPartitionColumnCount(partitionColumnCount); + vectorTaskColumnInfo.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index 429a058..92aa9e3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -68,6 +68,8 @@ public BaseWork(String name) { protected VectorizedRowBatchCtx vectorizedRowBatchCtx; + protected boolean useVectorizedInputFileFormat; + protected boolean llapMode = false; protected boolean uberMode = false; @@ -166,6 +168,14 @@ public void setVectorizedRowBatchCtx(VectorizedRowBatchCtx vectorizedRowBatchCtx this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } + public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { + this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + } + + public boolean getUseVectorizedInputFileFormat() { + return useVectorizedInputFileFormat; + } + // ----------------------------------------------------------------------------------------------- /** diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 0851d9e..f034812 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; @@ -130,6 +131,8 @@ private boolean doSplitsGrouping = true; + private VectorizedRowBatch vectorizedRowBatch; + // bitsets can't be correctly serialized by Kryo's default serializer // BitSet::wordsInUse is transient, so force dumping into a lower form private byte[] includedBuckets; @@ -635,4 +638,12 @@ public void setIncludedBuckets(BitSet includedBuckets) { // see comment next to the field this.includedBuckets = includedBuckets.toByteArray(); } + + public void setVectorizedRowBatch(VectorizedRowBatch vectorizedRowBatch) { + this.vectorizedRowBatch = vectorizedRowBatch; + } + + public VectorizedRowBatch getVectorizedRowBatch() { + return vectorizedRowBatch; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java index 8fe298d..e7352cf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java @@ -58,24 +58,64 @@ public TypeInfo getInvalidToTypeInfo() { } // Currently, we only support these no-precision-loss or promotion data type conversions: - // // - // Short -> Int IMPLICIT WITH VECTORIZATION - // Short -> BigInt IMPLICIT WITH VECTORIZATION - // Int --> BigInt IMPLICIT WITH VECTORIZATION // - // CONSIDER ADDING: - // Float -> Double IMPLICIT WITH VECTORIZATION - // (Char | VarChar) -> String IMPLICIT WITH VECTORIZATION + // TinyInt --> SmallInt + // TinyInt --> Int + // TinyInt --> BigInt // - private static HashMap validFromPrimitiveMap = + // SmallInt -> Int + // SmallInt -> BigInt + // + // Int --> BigInt + // + // Float -> Double + // + // (Char | VarChar) -> String + // + private static HashMap implicitPrimitiveMap = new HashMap(); static { - validFromPrimitiveMap.put( + implicitPrimitiveMap.put( + PrimitiveCategory.BYTE, + new PrimitiveCategory[] { PrimitiveCategory.SHORT, PrimitiveCategory.INT, PrimitiveCategory.LONG }); + implicitPrimitiveMap.put( PrimitiveCategory.SHORT, new PrimitiveCategory[] { PrimitiveCategory.INT, PrimitiveCategory.LONG }); - validFromPrimitiveMap.put( + implicitPrimitiveMap.put( PrimitiveCategory.INT, new PrimitiveCategory[] { PrimitiveCategory.LONG }); + implicitPrimitiveMap.put( + PrimitiveCategory.FLOAT, + new PrimitiveCategory[] { PrimitiveCategory.DOUBLE }); + implicitPrimitiveMap.put( + PrimitiveCategory.CHAR, + new PrimitiveCategory[] { PrimitiveCategory.STRING }); + implicitPrimitiveMap.put( + PrimitiveCategory.VARCHAR, + new PrimitiveCategory[] { PrimitiveCategory.STRING }); + } + + public static boolean isImplicitVectorColumnConversion(TypeInfo fromTypeInfo, + TypeInfo toTypeInfo) { + + if (fromTypeInfo.getCategory() == Category.PRIMITIVE && + toTypeInfo.getCategory() == Category.PRIMITIVE) { + + PrimitiveCategory fromPrimitiveCategory = + ((PrimitiveTypeInfo) fromTypeInfo).getPrimitiveCategory(); + PrimitiveCategory toPrimitiveCategory = + ((PrimitiveTypeInfo) toTypeInfo).getPrimitiveCategory(); + PrimitiveCategory[] toPrimitiveCategories = implicitPrimitiveMap.get(fromPrimitiveCategory); + if (toPrimitiveCategories != null) { + for (PrimitiveCategory candidatePrimitiveCategory : toPrimitiveCategories) { + if (candidatePrimitiveCategory == toPrimitiveCategory) { + return true; + } + } + } + return false; + } + return false; } private boolean validateOne(TypeInfo fromTypeInfo, TypeInfo toTypeInfo) { @@ -87,24 +127,13 @@ private boolean validateOne(TypeInfo fromTypeInfo, TypeInfo toTypeInfo) { if (fromTypeInfo.getCategory() == Category.PRIMITIVE && toTypeInfo.getCategory() == Category.PRIMITIVE) { - PrimitiveCategory fromPrimitiveCategory = ((PrimitiveTypeInfo) fromTypeInfo).getPrimitiveCategory(); - PrimitiveCategory toPrimitiveCategory = ((PrimitiveTypeInfo) toTypeInfo).getPrimitiveCategory(); - - PrimitiveCategory[] toPrimitiveCategories = - validFromPrimitiveMap.get(fromPrimitiveCategory); - if (toPrimitiveCategories == null || - !ArrayUtils.contains(toPrimitiveCategories, toPrimitiveCategory)) { - invalidFromTypeInfo = fromTypeInfo; - invalidToTypeInfo = toTypeInfo; + // For now, allow all conversions.... - // Tell caller a bad one was found. - validConversion = false; - return false; - } } else { // Ignore checking complex types. Assume they will not be included in the query. } + // The 2 TypeInfo are different. return true; } @@ -117,7 +146,6 @@ public void validateConversion(List fromTypeInfoList, // The method validateOne will turn this off when invalid conversion is found. validConversion = true; - boolean atLeastOneConversion = false; for (int i = 0; i < columnCount; i++) { TypeInfo fromTypeInfo = fromTypeInfoList.get(i); TypeInfo toTypeInfo = toTypeInfoList.get(i); @@ -127,12 +155,6 @@ public void validateConversion(List fromTypeInfoList, return; } } - - if (atLeastOneConversion) { - // Leave resultConversionFlags set. - } else { - resultConversionFlags = null; - } } public void validateConversion(TypeInfo[] fromTypeInfos, TypeInfo[] toTypeInfos) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java index 45151f2..7004577 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java @@ -39,44 +39,137 @@ // No data type conversion check? Assume ALTER TABLE prevented conversions that // VectorizedInputFileFormat cannot handle... // + // VECTOR_DESERIALIZE: + // LAZY_SIMPLE: + // Capable of converting on its own. + // LAZY_BINARY + // Partition schema assumed to match file contents. + // Conversion necessary from partition field values to vector columns. + // ROW_DESERIALIZE + // Partition schema assumed to match file contents. + // Conversion necessary from partition field values to vector columns. + // public static enum VectorMapOperatorReadType { NONE, - VECTORIZED_INPUT_FILE_FORMAT + VECTORIZED_INPUT_FILE_FORMAT, + VECTOR_DESERIALIZE, + ROW_DESERIALIZE } + public static enum VectorDeserializeType { + NONE, + LAZY_SIMPLE, + LAZY_BINARY + } private final VectorMapOperatorReadType vectorMapOperatorReadType; + private final VectorDeserializeType vectorDeserializeType; private final boolean needsDataTypeConversionCheck; + private final String rowDeserializerClassName; + private final String inputFileFormatClassName; + private boolean[] conversionFlags; private TypeInfo[] typeInfos; - private VectorPartitionDesc(VectorMapOperatorReadType vectorMapOperatorReadType, - boolean needsDataTypeConversionCheck) { + private VectorPartitionDesc(String inputFileFormatClassName, + VectorMapOperatorReadType vectorMapOperatorReadType, boolean needsDataTypeConversionCheck) { this.vectorMapOperatorReadType = vectorMapOperatorReadType; + this.vectorDeserializeType = VectorDeserializeType.NONE; this.needsDataTypeConversionCheck = needsDataTypeConversionCheck; + this.inputFileFormatClassName = inputFileFormatClassName; + rowDeserializerClassName = null; + conversionFlags = null; + typeInfos = null; + } + /** + * Create a VECTOR_DESERIALIZE flavor object. + * @param vectorMapOperatorReadType + * @param vectorDeserializeType + * @param needsDataTypeConversionCheck + */ + private VectorPartitionDesc(String inputFileFormatClassName, + VectorDeserializeType vectorDeserializeType, boolean needsDataTypeConversionCheck) { + this.vectorMapOperatorReadType = VectorMapOperatorReadType.VECTOR_DESERIALIZE; + this.vectorDeserializeType = vectorDeserializeType; + this.needsDataTypeConversionCheck = needsDataTypeConversionCheck; + this.inputFileFormatClassName = inputFileFormatClassName; + rowDeserializerClassName = null; + conversionFlags = null; + typeInfos = null; + } + + /** + * Create a ROW_DESERIALIZE flavor object. + * @param rowDeserializerClassName + * @param inputFileFormatClassName + */ + private VectorPartitionDesc(String inputFileFormatClassName, String rowDeserializerClassName) { + this.vectorMapOperatorReadType = VectorMapOperatorReadType.ROW_DESERIALIZE; + this.vectorDeserializeType = VectorDeserializeType.NONE; + this.needsDataTypeConversionCheck = true; + this.inputFileFormatClassName = inputFileFormatClassName; + this.rowDeserializerClassName = rowDeserializerClassName; conversionFlags = null; typeInfos = null; } - public static VectorPartitionDesc createVectorizedInputFileFormat() { - return new VectorPartitionDesc(VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT, true); + public static VectorPartitionDesc createVectorizedInputFileFormat(String inputFileFormatClassName) { + return new VectorPartitionDesc(inputFileFormatClassName, + VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT, true); } + public static VectorPartitionDesc createVectorDeserialize(String inputFileFormatClassName, + VectorDeserializeType vectorDeserializeType) { + boolean needsDataTypeConversionCheck; + switch (vectorDeserializeType) { + case LAZY_SIMPLE: + needsDataTypeConversionCheck = false; + break; + case LAZY_BINARY: + needsDataTypeConversionCheck = true; + break; + default: + throw new RuntimeException("Unexpected vector deserialize type " + + vectorDeserializeType.name()); + } + return new VectorPartitionDesc(inputFileFormatClassName, vectorDeserializeType, + needsDataTypeConversionCheck); + } + + public static VectorPartitionDesc createRowDeserialize(String inputFileFormatClassName, + String rowDeserializerClassName) { + return new VectorPartitionDesc(rowDeserializerClassName, inputFileFormatClassName); + } @Override public VectorPartitionDesc clone() { - VectorPartitionDesc result = - new VectorPartitionDesc(vectorMapOperatorReadType, - needsDataTypeConversionCheck); + VectorPartitionDesc result; + switch (vectorMapOperatorReadType) { + case VECTORIZED_INPUT_FILE_FORMAT: + result = new VectorPartitionDesc(inputFileFormatClassName, vectorMapOperatorReadType, + needsDataTypeConversionCheck); + break; + case VECTOR_DESERIALIZE: + result = new VectorPartitionDesc(inputFileFormatClassName, vectorDeserializeType, + needsDataTypeConversionCheck); + break; + case ROW_DESERIALIZE: + result = new VectorPartitionDesc(inputFileFormatClassName, rowDeserializerClassName); + break; + default: + throw new RuntimeException("Unexpected vector map operator read type " + vectorMapOperatorReadType.name()); + } + result.conversionFlags = (conversionFlags == null ? null : Arrays.copyOf(conversionFlags, conversionFlags.length)); result.typeInfos = Arrays.copyOf(typeInfos, typeInfos.length); + return result; } @@ -84,6 +177,17 @@ public VectorMapOperatorReadType getVectorMapOperatorReadType() { return vectorMapOperatorReadType; } + public String getInputFileFormatClassName() { + return inputFileFormatClassName; + } + + public VectorDeserializeType getVectorDeserializeType() { + return vectorDeserializeType; + } + + public String getRowDeserializerClassName() { + return rowDeserializerClassName; + } public boolean getNeedsDataTypeConversionCheck() { return needsDataTypeConversionCheck; } @@ -107,4 +211,28 @@ public void setTypeInfos(List typeInfoList) { public int getNonPartColumnCount() { return typeInfos.length; } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("vector map operator read type "); + sb.append(vectorMapOperatorReadType.name()); + sb.append(", input file format class name "); + sb.append(inputFileFormatClassName); + switch (vectorMapOperatorReadType) { + case VECTORIZED_INPUT_FILE_FORMAT: + break; + case VECTOR_DESERIALIZE: + sb.append(", deserialize type "); + sb.append(vectorDeserializeType.name()); + break; + case ROW_DESERIALIZE: + sb.append(", deserializer class name "); + sb.append(rowDeserializerClassName); + break; + default: + throw new RuntimeException("Unexpected vector map operator read type " + vectorMapOperatorReadType.name()); + } + return sb.toString(); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java index a5946d1..959a2af 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java @@ -34,13 +34,13 @@ */ public class TestVectorRowObject extends TestCase { - void examineBatch(VectorizedRowBatch batch, VectorExtractRowSameBatch vectorExtractRow, + void examineBatch(VectorizedRowBatch batch, VectorExtractRow vectorExtractRow, Object[][] randomRows, int firstRandomRowIndex ) { int rowSize = vectorExtractRow.getCount(); Object[] row = new Object[rowSize]; for (int i = 0; i < batch.size; i++) { - vectorExtractRow.extractRow(i, row); + vectorExtractRow.extractRow(batch, i, row); Object[] expectedRow = randomRows[firstRandomRowIndex + i]; for (int c = 0; c < rowSize; c++) { if (!row[c].equals(expectedRow[c])) { @@ -67,20 +67,18 @@ void testVectorRowObject(int caseNum, Random r) throws HiveException { cv.noNulls = false; } - VectorAssignRowSameBatch vectorAssignRow = new VectorAssignRowSameBatch(); + VectorAssignRow vectorAssignRow = new VectorAssignRow(); vectorAssignRow.init(source.typeNames()); - vectorAssignRow.setOneBatch(batch); - - VectorExtractRowSameBatch vectorExtractRow = new VectorExtractRowSameBatch(); + + VectorExtractRow vectorExtractRow = new VectorExtractRow(); vectorExtractRow.init(source.typeNames()); - vectorExtractRow.setOneBatch(batch); Object[][] randomRows = source.randomRows(100000); int firstRandomRowIndex = 0; for (int i = 0; i < randomRows.length; i++) { Object[] row = randomRows[i]; - vectorAssignRow.assignRow(batch.size, row); + vectorAssignRow.assignRow(batch, batch.size, row); batch.size++; if (batch.size == batch.DEFAULT_SIZE) { examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java index 7c0c8d1..2a33780 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java @@ -331,9 +331,8 @@ void testVectorSerializeRow(int caseNum, Random r, SerializationType serializati batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames); VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); - VectorAssignRowSameBatch vectorAssignRow = new VectorAssignRowSameBatch(); + VectorAssignRow vectorAssignRow = new VectorAssignRow(); vectorAssignRow.init(source.typeNames()); - vectorAssignRow.setOneBatch(batch); int fieldCount = source.typeNames().size(); DeserializeRead deserializeRead; @@ -369,7 +368,7 @@ void testVectorSerializeRow(int caseNum, Random r, SerializationType serializati for (int i = 0; i < randomRows.length; i++) { Object[] row = randomRows[i]; - vectorAssignRow.assignRow(batch.size, row); + vectorAssignRow.assignRow(batch, batch.size, row); batch.size++; if (batch.size == batch.DEFAULT_SIZE) { serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex); @@ -382,13 +381,13 @@ void testVectorSerializeRow(int caseNum, Random r, SerializationType serializati } } - void examineBatch(VectorizedRowBatch batch, VectorExtractRowSameBatch vectorExtractRow, + void examineBatch(VectorizedRowBatch batch, VectorExtractRow vectorExtractRow, Object[][] randomRows, int firstRandomRowIndex ) { int rowSize = vectorExtractRow.getCount(); Object[] row = new Object[rowSize]; for (int i = 0; i < batch.size; i++) { - vectorExtractRow.extractRow(i, row); + vectorExtractRow.extractRow(batch, i, row); Object[] expectedRow = randomRows[firstRandomRowIndex + i]; @@ -603,9 +602,8 @@ void testVectorDeserializeRow(int caseNum, Random r, SerializationType serializa cv.noNulls = false; } - VectorExtractRowSameBatch vectorExtractRow = new VectorExtractRowSameBatch(); + VectorExtractRow vectorExtractRow = new VectorExtractRow(); vectorExtractRow.init(source.typeNames()); - vectorExtractRow.setOneBatch(batch); Object[][] randomRows = source.randomRows(100000); int firstRandomRowIndex = 0; diff --git ql/src/test/queries/clientpositive/avro_schema_evolution_native.q ql/src/test/queries/clientpositive/avro_schema_evolution_native.q index b32e1ec..efeb167 100644 --- ql/src/test/queries/clientpositive/avro_schema_evolution_native.q +++ ql/src/test/queries/clientpositive/avro_schema_evolution_native.q @@ -1,3 +1,4 @@ +set hive.cli.print.header=true; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS -- Verify that table scans work with partitioned Avro tables @@ -19,6 +20,7 @@ STORED AS AVRO; SET hive.exec.dynamic.partition.mode=nonstrict; INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes; +DESCRIBE FORMATTED episodes_partitioned; ALTER TABLE episodes_partitioned SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' @@ -51,7 +53,12 @@ SERDEPROPERTIES ('avro.schema.literal'='{ } ] }'); +DESCRIBE FORMATTED episodes_partitioned; +set hive.fetch.task.conversion=more; + +EXPLAIN +SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; @@ -60,4 +67,15 @@ SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5; -- Fetch w/filter to specific partition SELECT * FROM episodes_partitioned WHERE doctor_pt = 6; -- Fetch w/non-existent partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5; + +set hive.fetch.task.conversion=none; + +EXPLAIN +SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; + +SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; + +SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5; +SELECT * FROM episodes_partitioned WHERE doctor_pt = 6; SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/bucket_groupby.q ql/src/test/queries/clientpositive/bucket_groupby.q index ea35bd7..a36c79d 100644 --- ql/src/test/queries/clientpositive/bucket_groupby.q +++ ql/src/test/queries/clientpositive/bucket_groupby.q @@ -1,3 +1,4 @@ +SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; create table clustergroupby(key string, value string) partitioned by(ds string); describe extended clustergroupby; @@ -6,16 +7,16 @@ alter table clustergroupby clustered by (key) into 1 buckets; insert overwrite table clustergroupby partition (ds='100') select key, value from src sort by key; explain -select key, count(1) from clustergroupby where ds='100' group by key limit 10; -select key, count(1) from clustergroupby where ds='100' group by key limit 10; +select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10; +select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10; describe extended clustergroupby; insert overwrite table clustergroupby partition (ds='101') select key, value from src distribute by key; --normal-- explain -select key, count(1) from clustergroupby where ds='101' group by key limit 10; -select key, count(1) from clustergroupby where ds='101' group by key limit 10; +select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10; +select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10; --function-- explain @@ -27,13 +28,13 @@ select abs(length(key)), count(1) from clustergroupby where ds='101' group by a --constant-- explain -select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10; -select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10; +select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10; +select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10; --subquery-- explain -select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10; -select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10; +select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10; +select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10; explain select key, count(1) from clustergroupby group by key; @@ -52,11 +53,11 @@ describe extended clustergroupby; insert overwrite table clustergroupby partition (ds='102') select key, value from src distribute by value sort by key, value; explain -select key, count(1) from clustergroupby where ds='102' group by key limit 10; -select key, count(1) from clustergroupby where ds='102' group by key limit 10; +select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10; +select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10; explain -select value, count(1) from clustergroupby where ds='102' group by value limit 10; -select value, count(1) from clustergroupby where ds='102' group by value limit 10; +select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10; +select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10; explain select key, count(1) from clustergroupby where ds='102' group by key, value limit 10; select key, count(1) from clustergroupby where ds='102' group by key, value limit 10; @@ -69,8 +70,8 @@ alter table clustergroupby clustered by (value, key) sorted by (key) into 1 buck describe extended clustergroupby; insert overwrite table clustergroupby partition (ds='103') select key, value from src distribute by value, key sort by key; explain -select key, count(1) from clustergroupby where ds='103' group by key limit 10; -select key, count(1) from clustergroupby where ds='103' group by key limit 10; +select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10; +select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10; explain -select key, count(1) from clustergroupby where ds='103' group by value, key limit 10; -select key, count(1) from clustergroupby where ds='103' group by value, key limit 10; +select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10; +select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10; diff --git ql/src/test/queries/clientpositive/groupby_sort_10.q ql/src/test/queries/clientpositive/groupby_sort_10.q index 910a272..3517693 100644 --- ql/src/test/queries/clientpositive/groupby_sort_10.q +++ ql/src/test/queries/clientpositive/groupby_sort_10.q @@ -2,6 +2,8 @@ set hive.mapred.mode=nonstrict; set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; +-- SORT_QUERY_RESULTS + CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; diff --git ql/src/test/queries/clientpositive/metadata_only_queries.q ql/src/test/queries/clientpositive/metadata_only_queries.q index cc72bb3..9f7a4b8 100644 --- ql/src/test/queries/clientpositive/metadata_only_queries.q +++ ql/src/test/queries/clientpositive/metadata_only_queries.q @@ -2,6 +2,9 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.compute.query.using.stats=true; set hive.stats.autogather=true; + +-- SORT_QUERY_RESULTS + create table over10k( t tinyint, si smallint, diff --git ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q index 95d033f..4084985 100644 --- ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q +++ ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q @@ -1,6 +1,9 @@ set hive.stats.dbclass=fs; set hive.compute.query.using.stats=true; set hive.explain.user=false; + +-- SORT_QUERY_RESULTS + create table over10k( t tinyint, si smallint, diff --git ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q index fc935d5..b0e57fb 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q @@ -4,6 +4,9 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; SET hive.exec.schema.evolution=false; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q index e49a0f3..ca6822c 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q @@ -3,6 +3,9 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; SET hive.exec.schema.evolution=false; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q index 6c256ea..f05f02a 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q @@ -1,7 +1,5 @@ set hive.cli.print.header=true; set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; - SET hive.vectorized.execution.enabled=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q index 30b19bb..da726c5 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q @@ -2,6 +2,9 @@ set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=more; set hive.exec.dynamic.partition.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q index 6df2095..393967f 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q @@ -1,8 +1,9 @@ set hive.cli.print.header=true; -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; - +SET hive.exec.schema.evolution=true; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q deleted file mode 100644 index 44f7264..0000000 --- ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q +++ /dev/null @@ -1,56 +0,0 @@ -set hive.cli.print.header=true; -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=false; -set hive.fetch.task.conversion=none; -set hive.exec.dynamic.partition.mode=nonstrict; - - --- SORT_QUERY_RESULTS --- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table --- --- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT ---- -CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; - -insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); - --- Table-Non-Cascade ADD COLUMNS ... -alter table table1 add columns(c int, d string); - -insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); - -insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); - --- SELECT permutation columns to make sure NULL defaulting works right -select a,b from table1; -select a,b,c from table1; -select a,b,c,d from table1; -select a,c,d from table1; -select a,d from table1; -select c from table1; -select d from table1; - --- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT --- smallint = (2-byte signed integer, from -32,768 to 32,767) --- -CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; - -insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - --- Table-Non-Cascade CHANGE COLUMNS ... -alter table table2 change column a a int; - -insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -insert into table table2 values(5000, 'new'),(90000, 'new'); - -select a,b from table2; - - -DROP TABLE table1; -DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q deleted file mode 100644 index 44f7264..0000000 --- ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q +++ /dev/null @@ -1,56 +0,0 @@ -set hive.cli.print.header=true; -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=false; -set hive.fetch.task.conversion=none; -set hive.exec.dynamic.partition.mode=nonstrict; - - --- SORT_QUERY_RESULTS --- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table --- --- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT ---- -CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; - -insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); - --- Table-Non-Cascade ADD COLUMNS ... -alter table table1 add columns(c int, d string); - -insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); - -insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); - --- SELECT permutation columns to make sure NULL defaulting works right -select a,b from table1; -select a,b,c from table1; -select a,b,c,d from table1; -select a,c,d from table1; -select a,d from table1; -select c from table1; -select d from table1; - --- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT --- smallint = (2-byte signed integer, from -32,768 to 32,767) --- -CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; - -insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - --- Table-Non-Cascade CHANGE COLUMNS ... -alter table table2 change column a a int; - -insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -insert into table table2 values(5000, 'new'),(90000, 'new'); - -select a,b from table2; - - -DROP TABLE table1; -DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q index 4d78642..e54bdec 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q @@ -8,7 +8,7 @@ set hive.exec.dynamic.partition.mode=nonstrict; -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q index 0834351..feb7d67 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q @@ -1,67 +1,54 @@ set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.enabled=false; set hive.fetch.task.conversion=more; +set hive.exec.dynamic.partition.mode=nonstrict; + -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); -select a,b from table1; - --- ADD COLUMNS +-- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string); insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); -select a,b,c,d from table1; - --- ADD COLUMNS -alter table table1 add columns(e string); - -insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2'); - -select a,b,c,d,e from table1; +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); +-- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE; - -insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - -select a,b from table3; - --- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int; - -insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -select a,b from table3; - --- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string); +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; -insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6'); +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); -select a,b from table3; +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); --- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int; +insert into table table2 values(5000, 'new'),(90000, 'new'); -select a,b from table3; +select a,b from table2; DROP TABLE table1; DROP TABLE table2; -DROP TABLE table3; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_part.q index 173e417..d314906 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_part.q +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_part.q @@ -8,7 +8,7 @@ set hive.exec.dynamic.partition.mode=nonstrict; -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_table.q index 83cab14..363b43c 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_table.q +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_table.q @@ -1,67 +1,53 @@ set hive.cli.print.header=true; -SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.enabled=false; set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); -select a,b from table1; - --- ADD COLUMNS +-- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string); insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); -select a,b,c,d from table1; - --- ADD COLUMNS -alter table table1 add columns(e string); - -insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2'); - -select a,b,c,d,e from table1; +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); +-- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE; - -insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - -select a,b from table3; - --- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int; - -insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -select a,b from table3; - --- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string); +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; -insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6'); +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); -select a,b from table3; +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); --- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int; +insert into table table2 values(5000, 'new'),(90000, 'new'); -select a,b from table3; +select a,b from table2; DROP TABLE table1; -DROP TABLE table2; -DROP TABLE table3; \ No newline at end of file +DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_part.q new file mode 100644 index 0000000..5e75c47 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_part.q @@ -0,0 +1,106 @@ +set hive.mapred.mode=nonstrict; +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=false; +SET hive.vectorized.use.vector.serde.deserialize=true; +SET hive.vectorized.use.row.serde.deserialize=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; +-- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string); + +insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1; + +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int; + +insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned2; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string); + +insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int; + +insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned4; + + +DROP TABLE partitioned1; +DROP TABLE partitioned2; +DROP TABLE partitioned3; +DROP TABLE partitioned4; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_table.q new file mode 100644 index 0000000..d09467a --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_table.q @@ -0,0 +1,62 @@ +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=false; +SET hive.vectorized.use.vector.serde.deserialize=true; +SET hive.vectorized.use.row.serde.deserialize=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; +-- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1; + +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; + +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; + +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table2 values(5000, 'new'),(90000, 'new'); + +select a,b from table2; + + +DROP TABLE table1; +DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_part.q new file mode 100644 index 0000000..02b5657 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_part.q @@ -0,0 +1,141 @@ +set hive.mapred.mode=nonstrict; +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=false; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=true; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + +-- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string); + +insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1; + +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int; + +insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned2; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string); + +insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int; + +insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned4; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- string to double +-- +CREATE TABLE partitioned5(a string, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned5 partition(part=1) values('1000', 'original'),('6737', 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned5 change column a a double; + +insert into table partitioned5 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned5 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned5; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- string to double +-- +CREATE TABLE partitioned6(a STRING, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned6 partition(part=1) values('1000', 'original'),('6737', 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned6 change column a a double; + +insert into table partitioned6 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned6; + +DROP TABLE partitioned1; +DROP TABLE partitioned2; +DROP TABLE partitioned3; +DROP TABLE partitioned4; +DROP TABLE partitioned5; +DROP TABLE partitioned6; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_table.q new file mode 100644 index 0000000..b35cb6f --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_table.q @@ -0,0 +1,80 @@ +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=false; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=true; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1; + +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; + +insert into table table2 values(1000, 'original'),(6737, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; + +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table2 values(5000, 'new'),(90000, 'new'); + +select a,b from table2; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- string to int +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE; + +insert into table table3 values('1000', 'original'),('6737', 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table3 change column a a int; + +insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table3 values(5000, 'new'),(90000, 'new'); + +select a,b from table3; + +DROP TABLE table1; +DROP TABLE table2; +DROP TABLE table3; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/tez_schema_evolution.q ql/src/test/queries/clientpositive/tez_schema_evolution.q index d855cd2..1d8f487 100644 --- ql/src/test/queries/clientpositive/tez_schema_evolution.q +++ ql/src/test/queries/clientpositive/tez_schema_evolution.q @@ -1,3 +1,4 @@ +SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out index 98e541a..852a679 100644 --- ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out +++ ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out @@ -81,6 +81,44 @@ POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(ep POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +title air_date doctor doctor_pt +PREHOOK: query: DESCRIBE FORMATTED episodes_partitioned +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@episodes_partitioned +POSTHOOK: query: DESCRIBE FORMATTED episodes_partitioned +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@episodes_partitioned +col_name data_type comment +# col_name data_type comment + +title string episode title +air_date string initial date +doctor int main actor playing the Doctor in episode + +# Partition Information +# col_name data_type comment + +doctor_pt int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.avro.AvroSerDe +InputFormat: org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: ALTER TABLE episodes_partitioned SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' WITH @@ -149,6 +187,69 @@ SERDEPROPERTIES ('avro.schema.literal'='{ POSTHOOK: type: ALTERTABLE_SERIALIZER POSTHOOK: Input: default@episodes_partitioned POSTHOOK: Output: default@episodes_partitioned +PREHOOK: query: DESCRIBE FORMATTED episodes_partitioned +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@episodes_partitioned +POSTHOOK: query: DESCRIBE FORMATTED episodes_partitioned +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@episodes_partitioned +col_name data_type comment +# col_name data_type comment + +title string episode title +air_date string initial date +doctor int main actor playing the Doctor in episode +value int default value + +# Partition Information +# col_name data_type comment + +doctor_pt int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.avro.AvroSerDe +InputFormat: org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + avro.schema.literal {\n \"namespace\": \"testing.hive.avro.serde\",\n \"name\": \"episodes\",\n \"type\": \"record\",\n \"fields\": [\n {\n \"name\":\"title\",\n \"type\":\"string\",\n \"doc\":\"episode title\"\n },\n {\n \"name\":\"air_date\",\n \"type\":\"string\",\n \"doc\":\"initial date\"\n },\n {\n \"name\":\"doctor\",\n \"type\":\"int\",\n \"doc\":\"main actor playing the Doctor in episode\"\n },\n {\n \"name\":\"value\",\n \"type\":\"int\",\n \"default\":0,\n \"doc\":\"default value\"\n }\n ]\n} + serialization.format 1 +PREHOOK: query: EXPLAIN +SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: episodes_partitioned + Statistics: Num rows: 3 Data size: 889 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: title (type: string), air_date (type: string), doctor (type: int), value (type: int), doctor_pt (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 889 Basic stats: COMPLETE Column stats: NONE + ListSink + PREHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 PREHOOK: type: QUERY PREHOOK: Input: default@episodes_partitioned @@ -161,6 +262,7 @@ POSTHOOK: Input: default@episodes_partitioned POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 #### A masked pattern was here #### +episodes_partitioned.title episodes_partitioned.air_date episodes_partitioned.doctor episodes_partitioned.value episodes_partitioned.doctor_pt Rose 26 March 2005 9 0 9 The Doctor's Wife 14 May 2011 11 0 11 The Eleventh Hour 3 April 2010 11 0 11 @@ -188,6 +290,7 @@ POSTHOOK: Input: default@episodes_partitioned@doctor_pt=5 POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 #### A masked pattern was here #### +episodes_partitioned.title episodes_partitioned.air_date episodes_partitioned.doctor episodes_partitioned.value episodes_partitioned.doctor_pt An Unearthly Child 23 November 1963 1 0 1 Horror of Fang Rock 3 September 1977 4 0 4 Rose 26 March 2005 9 0 9 @@ -205,6 +308,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@episodes_partitioned POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 #### A masked pattern was here #### +episodes_partitioned.title episodes_partitioned.air_date episodes_partitioned.doctor episodes_partitioned.value episodes_partitioned.doctor_pt The Mysterious Planet 6 September 1986 6 0 6 PREHOOK: query: -- Fetch w/non-existent partition SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 @@ -216,3 +320,105 @@ SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@episodes_partitioned #### A masked pattern was here #### +episodes_partitioned.title episodes_partitioned.air_date episodes_partitioned.doctor episodes_partitioned.value episodes_partitioned.doctor_pt +PREHOOK: query: EXPLAIN +SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: episodes_partitioned + Statistics: Num rows: 3 Data size: 889 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: title (type: string), air_date (type: string), doctor (type: int), value (type: int), doctor_pt (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 889 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 889 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +episodes_partitioned.title episodes_partitioned.air_date episodes_partitioned.doctor episodes_partitioned.value episodes_partitioned.doctor_pt +Rose 26 March 2005 9 0 9 +The Doctor's Wife 14 May 2011 11 0 11 +The Eleventh Hour 3 April 2010 11 0 11 +PREHOOK: query: SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=1 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=2 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=4 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=5 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=1 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=2 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=4 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=5 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +episodes_partitioned.title episodes_partitioned.air_date episodes_partitioned.doctor episodes_partitioned.value episodes_partitioned.doctor_pt +An Unearthly Child 23 November 1963 1 0 1 +Horror of Fang Rock 3 September 1977 4 0 4 +Rose 26 March 2005 9 0 9 +The Doctor's Wife 14 May 2011 11 0 11 +The Eleventh Hour 3 April 2010 11 0 11 +PREHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 +#### A masked pattern was here #### +episodes_partitioned.title episodes_partitioned.air_date episodes_partitioned.doctor episodes_partitioned.value episodes_partitioned.doctor_pt +The Mysterious Planet 6 September 1986 6 0 6 +PREHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +#### A masked pattern was here #### +episodes_partitioned.title episodes_partitioned.air_date episodes_partitioned.doctor episodes_partitioned.value episodes_partitioned.doctor_pt diff --git ql/src/test/results/clientpositive/bucket_groupby.q.out ql/src/test/results/clientpositive/bucket_groupby.q.out index d1414fe..ae736f9 100644 --- ql/src/test/results/clientpositive/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/bucket_groupby.q.out @@ -41,14 +41,15 @@ POSTHOOK: Output: default@clustergroupby@ds=100 POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: explain -select key, count(1) from clustergroupby where ds='100' group by key limit 10 +select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain -select key, count(1) from clustergroupby where ds='100' group by key limit 10 +select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -72,7 +73,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -81,6 +81,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -98,12 +120,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select key, count(1) from clustergroupby where ds='100' group by key limit 10 +PREHOOK: query: select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby PREHOOK: Input: default@clustergroupby@ds=100 #### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='100' group by key limit 10 +POSTHOOK: query: select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=100 @@ -146,15 +168,16 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: --normal-- explain -select key, count(1) from clustergroupby where ds='101' group by key limit 10 +select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10 PREHOOK: type: QUERY POSTHOOK: query: --normal-- explain -select key, count(1) from clustergroupby where ds='101' group by key limit 10 +select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -178,7 +201,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -187,6 +209,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -204,12 +248,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key limit 10 +PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby PREHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key limit 10 +POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=101 @@ -370,15 +414,16 @@ POSTHOOK: Input: default@clustergroupby@ds=101 3 416 PREHOOK: query: --constant-- explain -select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10 +select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 PREHOOK: type: QUERY POSTHOOK: query: --constant-- explain -select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10 +select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -402,7 +447,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), 3 (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -415,16 +459,38 @@ STAGE PLANS: expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), 3 (type: int) + sort order: ++ + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -432,12 +498,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10 +PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby PREHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10 +POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=101 @@ -454,15 +520,16 @@ POSTHOOK: Input: default@clustergroupby@ds=101 114 1 PREHOOK: query: --subquery-- explain -select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10 +select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10 PREHOOK: type: QUERY POSTHOOK: query: --subquery-- explain -select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10 +select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -486,7 +553,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -495,6 +561,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -512,12 +600,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10 +PREHOOK: query: select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby PREHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10 +POSTHOOK: query: select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=101 @@ -1020,14 +1108,15 @@ POSTHOOK: Output: default@clustergroupby@ds=102 POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: explain -select key, count(1) from clustergroupby where ds='102' group by key limit 10 +select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain -select key, count(1) from clustergroupby where ds='102' group by key limit 10 +select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1052,7 +1141,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1061,6 +1149,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -1078,12 +1188,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key limit 10 +PREHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby PREHOOK: Input: default@clustergroupby@ds=102 #### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key limit 10 +POSTHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=102 @@ -1099,14 +1209,15 @@ POSTHOOK: Input: default@clustergroupby@ds=102 113 2 114 1 PREHOOK: query: explain -select value, count(1) from clustergroupby where ds='102' group by value limit 10 +select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain -select value, count(1) from clustergroupby where ds='102' group by value limit 10 +select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1130,7 +1241,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1139,6 +1249,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -1156,12 +1288,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select value, count(1) from clustergroupby where ds='102' group by value limit 10 +PREHOOK: query: select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby PREHOOK: Input: default@clustergroupby@ds=102 #### A masked pattern was here #### -POSTHOOK: query: select value, count(1) from clustergroupby where ds='102' group by value limit 10 +POSTHOOK: query: select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=102 @@ -1297,14 +1429,15 @@ POSTHOOK: Output: default@clustergroupby@ds=103 POSTHOOK: Lineage: clustergroupby PARTITION(ds=103).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: clustergroupby PARTITION(ds=103).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: explain -select key, count(1) from clustergroupby where ds='103' group by key limit 10 +select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain -select key, count(1) from clustergroupby where ds='103' group by key limit 10 +select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1329,7 +1462,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1338,6 +1470,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -1355,12 +1509,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select key, count(1) from clustergroupby where ds='103' group by key limit 10 +PREHOOK: query: select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby PREHOOK: Input: default@clustergroupby@ds=103 #### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='103' group by key limit 10 +POSTHOOK: query: select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=103 @@ -1376,14 +1530,15 @@ POSTHOOK: Input: default@clustergroupby@ds=103 113 2 114 1 PREHOOK: query: explain -select key, count(1) from clustergroupby where ds='103' group by value, key limit 10 +select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain -select key, count(1) from clustergroupby where ds='103' group by value, key limit 10 +select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1407,7 +1562,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1420,16 +1574,38 @@ STAGE PLANS: expressions: _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1437,12 +1613,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select key, count(1) from clustergroupby where ds='103' group by value, key limit 10 +PREHOOK: query: select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby PREHOOK: Input: default@clustergroupby@ds=103 #### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='103' group by value, key limit 10 +POSTHOOK: query: select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=103 diff --git ql/src/test/results/clientpositive/groupby_sort_10.q.out ql/src/test/results/clientpositive/groupby_sort_10.q.out index c682e95..9b8d388 100644 --- ql/src/test/results/clientpositive/groupby_sort_10.q.out +++ ql/src/test/results/clientpositive/groupby_sort_10.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 -POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default diff --git ql/src/test/results/clientpositive/metadata_only_queries.q.out ql/src/test/results/clientpositive/metadata_only_queries.q.out index 9bbc9b9..3921f50 100644 --- ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: create table over10k( +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table over10k( t tinyint, si smallint, i int, @@ -15,7 +17,9 @@ PREHOOK: query: create table over10k( PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@over10k -POSTHOOK: query: create table over10k( +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table over10k( t tinyint, si smallint, i int, diff --git ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out index 6dea3e0..ecedef0 100644 --- ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out +++ ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: create table over10k( +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table over10k( t tinyint, si smallint, i int, @@ -15,7 +17,9 @@ PREHOOK: query: create table over10k( PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@over10k -POSTHOOK: query: create table over10k( +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table over10k( t tinyint, si smallint, i int, diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_part.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_part.q.out index 44ce24e..9c2e5ed 100644 --- ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_part.q.out +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_part.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT @@ -11,7 +11,7 @@ PREHOOK: Output: database:default PREHOOK: Output: default@partitioned1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_table.q.out index 4003c20..e60066b 100644 --- ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_table.q.out +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_table.q.out @@ -1,9 +1,9 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -11,10 +11,10 @@ PREHOOK: Output: database:default PREHOOK: Output: default@table1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -31,25 +31,12 @@ POSTHOOK: Output: default@table1 POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table1 -PREHOOK: type: QUERY -PREHOOK: Input: default@table1 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table1 -#### A masked pattern was here #### -a b -1 original -2 original -3 original -4 original -PREHOOK: query: -- ADD COLUMNS +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@table1 PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) POSTHOOK: type: ALTERTABLE_ADDCOLS POSTHOOK: Input: default@table1 @@ -67,6 +54,59 @@ POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2 POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] _col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 PREHOOK: query: select a,b,c,d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 @@ -84,185 +124,150 @@ a b c d 3 original NULL NULL 4 new 40 forty 4 original NULL NULL -PREHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY PREHOOK: Input: default@table1 -PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 -POSTHOOK: Output: default@table1 -PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 PREHOOK: type: QUERY -PREHOOK: Input: default@values__tmp__table__3 -PREHOOK: Output: default@table1 -POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 POSTHOOK: type: QUERY -POSTHOOK: Input: default@values__tmp__table__3 -POSTHOOK: Output: default@table1 -POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] -POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] -_col0 _col1 _col2 _col3 _col4 -PREHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 #### A masked pattern was here #### -POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: query: select d from table1 POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 #### A masked pattern was here #### -a b c d e -1 new 10 ten NULL -1 original NULL NULL NULL -2 new 20 twenty NULL -2 original NULL NULL NULL -3 new 30 thirty NULL -3 original NULL NULL NULL -4 new 40 forty NULL -4 original NULL NULL NULL -5 new 100 hundred another1 -6 new 200 two hundred another2 +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred PREHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@table3 +PREHOOK: Output: default@table2 POSTHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__4 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__4 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -3 original -4 original -6737 original -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__5 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__5 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -6737 original -72909 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__6 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__6 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -_col0 _col1 _col2 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -5000 new -6737 original -72909 new -90000 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: select a,b from table3 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 PREHOOK: type: QUERY -PREHOOK: Input: default@table3 +PREHOOK: Input: default@table2 #### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 +POSTHOOK: query: select a,b from table2 POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 +POSTHOOK: Input: default@table2 #### A masked pattern was here #### a b 1000 original @@ -285,13 +290,9 @@ POSTHOOK: Input: default@table1 POSTHOOK: Output: default@table1 PREHOOK: query: DROP TABLE table2 PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 POSTHOOK: query: DROP TABLE table2 POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE table3 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: DROP TABLE table3 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_part.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_part.q.out index 44f5822..9c2e5ed 100644 --- ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_part.q.out +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_part.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT @@ -11,7 +11,7 @@ PREHOOK: Output: database:default PREHOOK: Output: default@partitioned1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_table.q.out index 4003c20..0a2eabb 100644 --- ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_table.q.out +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_table.q.out @@ -1,9 +1,9 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -11,10 +11,10 @@ PREHOOK: Output: database:default PREHOOK: Output: default@table1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -31,25 +31,12 @@ POSTHOOK: Output: default@table1 POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table1 -PREHOOK: type: QUERY -PREHOOK: Input: default@table1 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table1 -#### A masked pattern was here #### -a b -1 original -2 original -3 original -4 original -PREHOOK: query: -- ADD COLUMNS +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@table1 PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) POSTHOOK: type: ALTERTABLE_ADDCOLS POSTHOOK: Input: default@table1 @@ -67,6 +54,59 @@ POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2 POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] _col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 PREHOOK: query: select a,b,c,d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 @@ -84,185 +124,150 @@ a b c d 3 original NULL NULL 4 new 40 forty 4 original NULL NULL -PREHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY PREHOOK: Input: default@table1 -PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 -POSTHOOK: Output: default@table1 -PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 PREHOOK: type: QUERY -PREHOOK: Input: default@values__tmp__table__3 -PREHOOK: Output: default@table1 -POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 POSTHOOK: type: QUERY -POSTHOOK: Input: default@values__tmp__table__3 -POSTHOOK: Output: default@table1 -POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] -POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] -_col0 _col1 _col2 _col3 _col4 -PREHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 #### A masked pattern was here #### -POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: query: select d from table1 POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 #### A masked pattern was here #### -a b c d e -1 new 10 ten NULL -1 original NULL NULL NULL -2 new 20 twenty NULL -2 original NULL NULL NULL -3 new 30 thirty NULL -3 original NULL NULL NULL -4 new 40 forty NULL -4 original NULL NULL NULL -5 new 100 hundred another1 -6 new 200 two hundred another2 +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred PREHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@table3 +PREHOOK: Output: default@table2 POSTHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__4 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__4 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -3 original -4 original -6737 original -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__5 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__5 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -6737 original -72909 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__6 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__6 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -_col0 _col1 _col2 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -5000 new -6737 original -72909 new -90000 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: select a,b from table3 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 PREHOOK: type: QUERY -PREHOOK: Input: default@table3 +PREHOOK: Input: default@table2 #### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 +POSTHOOK: query: select a,b from table2 POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 +POSTHOOK: Input: default@table2 #### A masked pattern was here #### a b 1000 original @@ -285,13 +290,9 @@ POSTHOOK: Input: default@table1 POSTHOOK: Output: default@table1 PREHOOK: query: DROP TABLE table2 PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 POSTHOOK: query: DROP TABLE table2 POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE table3 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: DROP TABLE table3 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_part.q.out ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_part.q.out new file mode 100644 index 0000000..39eb3ed --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_part.q.out @@ -0,0 +1,687 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: partitioned1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_table.q.out new file mode 100644 index 0000000..b5b34e0 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_table.q.out @@ -0,0 +1,343 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: table1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_part.q.out ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_part.q.out new file mode 100644 index 0000000..930b772 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_part.q.out @@ -0,0 +1,859 @@ +PREHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: partitioned1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- string to double +-- +CREATE TABLE partitioned5(a string, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- string to double +-- +CREATE TABLE partitioned5(a string, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=1) values('1000', 'original'),('6737', 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__11 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values('1000', 'original'),('6737', 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__11 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +values__tmp__table__11.tmp_values_col1 values__tmp__table__11.tmp_values_col2 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned5 change column a a double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned5 change column a a double +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__12 +PREHOOK: Output: default@partitioned5@part=2 +POSTHOOK: query: insert into table partitioned5 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__12 +POSTHOOK: Output: default@partitioned5@part=2 +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).a EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).b SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned5 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__13 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__13 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned5 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +part a b +1 1000.0 original +1 3.0 original +1 4.0 original +1 5000.0 new +1 6737.0 original +1 90000.0 new +2 200.0 new +2 32768.0 new +2 40000.0 new +2 72909.0 new +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- string to double +-- +CREATE TABLE partitioned6(a STRING, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- string to double +-- +CREATE TABLE partitioned6(a STRING, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part=1) values('1000', 'original'),('6737', 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__14 +PREHOOK: Output: default@partitioned6@part=1 +POSTHOOK: query: insert into table partitioned6 partition(part=1) values('1000', 'original'),('6737', 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__14 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +values__tmp__table__14.tmp_values_col1 values__tmp__table__14.tmp_values_col2 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned6 change column a a double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned6 change column a a double +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__15 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: insert into table partitioned6 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__15 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Output: default@partitioned6@part=2 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +part a b +1 1000.0 original +1 3.0 original +1 4.0 original +1 5000.0 new +1 6737.0 original +1 90000.0 new +2 200.0 new +2 32768.0 new +2 40000.0 new +2 72909.0 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: DROP TABLE partitioned5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: DROP TABLE partitioned5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: DROP TABLE partitioned6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: DROP TABLE partitioned6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 diff --git ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_table.q.out new file mode 100644 index 0000000..953c2b5 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_table.q.out @@ -0,0 +1,427 @@ +PREHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: table1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- string to int +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table3 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- string to int +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values('1000', 'original'),('6737', 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values('1000', 'original'),('6737', 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table3 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table3 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table3 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: DROP TABLE table3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: DROP TABLE table3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_part.q.out index 44ce24e..9c2e5ed 100644 --- ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_part.q.out +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_part.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT @@ -11,7 +11,7 @@ PREHOOK: Output: database:default PREHOOK: Output: default@partitioned1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_table.q.out index 4003c20..e60066b 100644 --- ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_table.q.out +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_table.q.out @@ -1,9 +1,9 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -11,10 +11,10 @@ PREHOOK: Output: database:default PREHOOK: Output: default@table1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -31,25 +31,12 @@ POSTHOOK: Output: default@table1 POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table1 -PREHOOK: type: QUERY -PREHOOK: Input: default@table1 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table1 -#### A masked pattern was here #### -a b -1 original -2 original -3 original -4 original -PREHOOK: query: -- ADD COLUMNS +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@table1 PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) POSTHOOK: type: ALTERTABLE_ADDCOLS POSTHOOK: Input: default@table1 @@ -67,6 +54,59 @@ POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2 POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] _col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 PREHOOK: query: select a,b,c,d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 @@ -84,185 +124,150 @@ a b c d 3 original NULL NULL 4 new 40 forty 4 original NULL NULL -PREHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY PREHOOK: Input: default@table1 -PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 -POSTHOOK: Output: default@table1 -PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 PREHOOK: type: QUERY -PREHOOK: Input: default@values__tmp__table__3 -PREHOOK: Output: default@table1 -POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 POSTHOOK: type: QUERY -POSTHOOK: Input: default@values__tmp__table__3 -POSTHOOK: Output: default@table1 -POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] -POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] -_col0 _col1 _col2 _col3 _col4 -PREHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 #### A masked pattern was here #### -POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: query: select d from table1 POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 #### A masked pattern was here #### -a b c d e -1 new 10 ten NULL -1 original NULL NULL NULL -2 new 20 twenty NULL -2 original NULL NULL NULL -3 new 30 thirty NULL -3 original NULL NULL NULL -4 new 40 forty NULL -4 original NULL NULL NULL -5 new 100 hundred another1 -6 new 200 two hundred another2 +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred PREHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@table3 +PREHOOK: Output: default@table2 POSTHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__4 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__4 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -3 original -4 original -6737 original -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__5 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__5 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -6737 original -72909 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__6 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__6 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -_col0 _col1 _col2 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -5000 new -6737 original -72909 new -90000 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: select a,b from table3 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 PREHOOK: type: QUERY -PREHOOK: Input: default@table3 +PREHOOK: Input: default@table2 #### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 +POSTHOOK: query: select a,b from table2 POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 +POSTHOOK: Input: default@table2 #### A masked pattern was here #### a b 1000 original @@ -285,13 +290,9 @@ POSTHOOK: Input: default@table1 POSTHOOK: Output: default@table1 PREHOOK: query: DROP TABLE table2 PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 POSTHOOK: query: DROP TABLE table2 POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE table3 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: DROP TABLE table3 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_part.q.out index 44f5822..9c2e5ed 100644 --- ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_part.q.out +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_part.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT @@ -11,7 +11,7 @@ PREHOOK: Output: database:default PREHOOK: Output: default@partitioned1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_table.q.out index 4003c20..0a2eabb 100644 --- ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_table.q.out +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_table.q.out @@ -1,9 +1,9 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -11,10 +11,10 @@ PREHOOK: Output: database:default PREHOOK: Output: default@table1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -31,25 +31,12 @@ POSTHOOK: Output: default@table1 POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table1 -PREHOOK: type: QUERY -PREHOOK: Input: default@table1 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table1 -#### A masked pattern was here #### -a b -1 original -2 original -3 original -4 original -PREHOOK: query: -- ADD COLUMNS +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@table1 PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) POSTHOOK: type: ALTERTABLE_ADDCOLS POSTHOOK: Input: default@table1 @@ -67,6 +54,59 @@ POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2 POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] _col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 PREHOOK: query: select a,b,c,d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 @@ -84,185 +124,150 @@ a b c d 3 original NULL NULL 4 new 40 forty 4 original NULL NULL -PREHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY PREHOOK: Input: default@table1 -PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 -POSTHOOK: Output: default@table1 -PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 PREHOOK: type: QUERY -PREHOOK: Input: default@values__tmp__table__3 -PREHOOK: Output: default@table1 -POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 POSTHOOK: type: QUERY -POSTHOOK: Input: default@values__tmp__table__3 -POSTHOOK: Output: default@table1 -POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] -POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] -_col0 _col1 _col2 _col3 _col4 -PREHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 #### A masked pattern was here #### -POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: query: select d from table1 POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 #### A masked pattern was here #### -a b c d e -1 new 10 ten NULL -1 original NULL NULL NULL -2 new 20 twenty NULL -2 original NULL NULL NULL -3 new 30 thirty NULL -3 original NULL NULL NULL -4 new 40 forty NULL -4 original NULL NULL NULL -5 new 100 hundred another1 -6 new 200 two hundred another2 +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred PREHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@table3 +PREHOOK: Output: default@table2 POSTHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__4 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__4 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -3 original -4 original -6737 original -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__5 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__5 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -6737 original -72909 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__6 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__6 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -_col0 _col1 _col2 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -5000 new -6737 original -72909 new -90000 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: select a,b from table3 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 PREHOOK: type: QUERY -PREHOOK: Input: default@table3 +PREHOOK: Input: default@table2 #### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 +POSTHOOK: query: select a,b from table2 POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 +POSTHOOK: Input: default@table2 #### A masked pattern was here #### a b 1000 original @@ -285,13 +290,9 @@ POSTHOOK: Input: default@table1 POSTHOOK: Output: default@table1 PREHOOK: query: DROP TABLE table2 PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 POSTHOOK: query: DROP TABLE table2 POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE table3 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: DROP TABLE table3 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_part.q.out new file mode 100644 index 0000000..e17b1b0 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_part.q.out @@ -0,0 +1,670 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +Explain +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [OP_3] (rows=10 width=13) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=10 width=13) + default@partitioned1,partitioned1,Tbl:COMPLETE,Col:NONE,Output:["a","b"] + +PREHOOK: query: select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table.q.out new file mode 100644 index 0000000..ffce206 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table.q.out @@ -0,0 +1,326 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +POSTHOOK: type: QUERY +Explain +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [OP_3] (rows=10 width=13) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=10 width=13) + default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["a","b"] + +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table_bug.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table_bug.q.out new file mode 100644 index 0000000..50113c8 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table_bug.q.out @@ -0,0 +1,96 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_part.q.out new file mode 100644 index 0000000..d0337ee --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_part.q.out @@ -0,0 +1,842 @@ +PREHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +Explain +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [OP_3] (rows=10 width=13) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=10 width=13) + default@partitioned1,partitioned1,Tbl:COMPLETE,Col:NONE,Output:["a","b"] + +PREHOOK: query: select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- string to double +-- +CREATE TABLE partitioned5(a string, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- string to double +-- +CREATE TABLE partitioned5(a string, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=1) values('1000', 'original'),('6737', 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__11 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values('1000', 'original'),('6737', 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__11 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +values__tmp__table__11.tmp_values_col1 values__tmp__table__11.tmp_values_col2 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned5 change column a a double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned5 change column a a double +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__12 +PREHOOK: Output: default@partitioned5@part=2 +POSTHOOK: query: insert into table partitioned5 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__12 +POSTHOOK: Output: default@partitioned5@part=2 +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).a EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).b SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned5 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__13 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__13 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned5 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +part a b +1 1000.0 original +1 3.0 original +1 4.0 original +1 5000.0 new +1 6737.0 original +1 90000.0 new +2 200.0 new +2 32768.0 new +2 40000.0 new +2 72909.0 new +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- string to double +-- +CREATE TABLE partitioned6(a STRING, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- string to double +-- +CREATE TABLE partitioned6(a STRING, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part=1) values('1000', 'original'),('6737', 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__14 +PREHOOK: Output: default@partitioned6@part=1 +POSTHOOK: query: insert into table partitioned6 partition(part=1) values('1000', 'original'),('6737', 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__14 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +values__tmp__table__14.tmp_values_col1 values__tmp__table__14.tmp_values_col2 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned6 change column a a double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned6 change column a a double +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__15 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: insert into table partitioned6 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__15 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Output: default@partitioned6@part=2 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +part a b +1 1000.0 original +1 3.0 original +1 4.0 original +1 5000.0 new +1 6737.0 original +1 90000.0 new +2 200.0 new +2 32768.0 new +2 40000.0 new +2 72909.0 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: DROP TABLE partitioned5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: DROP TABLE partitioned5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: DROP TABLE partitioned6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: DROP TABLE partitioned6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_table.q.out new file mode 100644 index 0000000..9e02a61 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_table.q.out @@ -0,0 +1,410 @@ +PREHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +POSTHOOK: type: QUERY +Explain +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [OP_3] (rows=10 width=13) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=10 width=13) + default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["a","b"] + +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- string to int +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table3 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- string to int +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values('1000', 'original'),('6737', 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values('1000', 'original'),('6737', 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table3 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table3 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table3 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: DROP TABLE table3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: DROP TABLE table3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java index c6ff748..5ca5416 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java @@ -88,6 +88,18 @@ private InputByteBuffer inputByteBuffer = new InputByteBuffer(); + private boolean[] columnsToInclude; + + // Temporary objects to use when skipping for columnsToInclude. + private ReadDateResults dummyReadDateResults; + private ReadTimestampResults dummyReadTimestampResults; + private ReadStringResults dummyReadStringResults; + private ReadHiveCharResults dummyReadHiveCharResults; + private ReadHiveVarcharResults dummyReadHiveVarcharResults; + private ReadBinaryResults dummyReadBinaryResults; + private ReadIntervalYearMonthResults dummyReadIntervalYearMonthResults; + private ReadIntervalDayTimeResults dummyReadIntervalDayTimeResults; + /* * Use this constructor when only ascending sort order is used. */ @@ -109,6 +121,8 @@ public BinarySortableDeserializeRead(TypeInfo[] typeInfos, readBeyondConfiguredFieldsWarned = false; readBeyondBufferRangeWarned = false; bufferRangeHasExtraDataWarned = false; + + columnsToInclude = null; } // Not public since we must have column information. @@ -123,6 +137,16 @@ private BinarySortableDeserializeRead() { } /* + * If some fields are are not going to be used by the query, use this routine to specify + * the columns to return. The readCheckNull method will automatically return NULL for the + * other columns. + */ + @Override + public void setColumnsToInclude(boolean[] columnsToInclude) { + this.columnsToInclude = columnsToInclude; + } + + /* * Set the range of bytes to be deserialized. */ @Override @@ -177,13 +201,99 @@ public boolean readCheckNull() throws IOException { // We have a field and are positioned to it. - if (((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { - return false; - } + // Do we want this field? + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory(); + if (columnsToInclude != null && !columnsToInclude[fieldIndex]) { + + // We must read through the value to ignore it... + switch (primitiveCategory) { + case BOOLEAN: + readBoolean(); + break; + case BYTE: + readByte(); + break; + case SHORT: + readShort(); + break; + case INT: + readInt(); + break; + case LONG: + readLong(); + break; + case DATE: + if (dummyReadDateResults == null) { + dummyReadDateResults = createReadDateResults(); + } + readDate(dummyReadDateResults); + break; + case TIMESTAMP: + if (dummyReadTimestampResults == null) { + dummyReadTimestampResults = createReadTimestampResults(); + } + readTimestamp(dummyReadTimestampResults); + break; + case FLOAT: + readFloat(); + break; + case DOUBLE: + readDouble(); + break; + case STRING: + if (dummyReadStringResults == null) { + dummyReadStringResults = createReadStringResults(); + } + readString(dummyReadStringResults); + break; + case CHAR: + if (dummyReadHiveCharResults == null) { + dummyReadHiveCharResults = createReadHiveCharResults(); + } + readHiveChar(dummyReadHiveCharResults); + break; + case VARCHAR: + if (dummyReadHiveVarcharResults == null) { + dummyReadHiveVarcharResults = createReadHiveVarcharResults(); + } + readHiveVarchar(dummyReadHiveVarcharResults); + break; + case BINARY: + if (dummyReadBinaryResults == null) { + dummyReadBinaryResults = createReadBinaryResults(); + } + readBinary(dummyReadBinaryResults); + break; + case INTERVAL_YEAR_MONTH: + if (dummyReadIntervalYearMonthResults == null) { + dummyReadIntervalYearMonthResults = createReadIntervalYearMonthResults(); + } + readIntervalYearMonth(dummyReadIntervalYearMonthResults); + break; + case INTERVAL_DAY_TIME: + if (dummyReadIntervalDayTimeResults == null) { + dummyReadIntervalDayTimeResults = createReadIntervalDayTimeResults(); + } + readIntervalDayTime(dummyReadIntervalDayTimeResults); + break; + case DECIMAL: + // UNDONE: broken + earlyReadHiveDecimal(); + break; + default: + throw new RuntimeException("Unexpected primitive type category " + primitiveCategory); + } + return true; + } else { + // UNDONE + if (primitiveCategory != PrimitiveCategory.DECIMAL) { + return false; + } - // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read - // it here. - return earlyReadHiveDecimal(); + // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read + // it here. + return earlyReadHiveDecimal(); + } } /* diff --git serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java index c2b0cfc..dbadf4e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java @@ -60,6 +60,13 @@ TypeInfo[] typeInfos(); /* + * If some fields are are not going to be used by the query, use this routine to specify + * the columns to return. The readCheckNull method will automatically return NULL for the + * other columns. + */ + void setColumnsToInclude(boolean[] columnsToInclude); + + /* * Set the range of bytes to be deserialized. */ void set(byte[] bytes, int offset, int length); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java index f44a84b..f0312ff 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java @@ -23,6 +23,7 @@ import java.nio.charset.CharacterCodingException; import java.sql.Date; import java.sql.Timestamp; +import java.util.Arrays; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -71,12 +72,14 @@ private TypeInfo[] typeInfos; + private int[] startPosition; private byte separator; private boolean isEscaped; private byte escapeChar; private byte[] nullSequenceBytes; private boolean isExtendedBooleanLiteral; + private boolean lastColumnTakesRest; private byte[] bytes; private int start; @@ -107,27 +110,38 @@ private Text tempText; private TimestampParser timestampParser; - private boolean readBeyondConfiguredFieldsWarned; - private boolean readBeyondBufferRangeWarned; - private boolean bufferRangeHasExtraDataWarned; + private boolean extraFieldWarned; + private boolean missingFieldWarned; + + private boolean[] columnsToInclude; public LazySimpleDeserializeRead(TypeInfo[] typeInfos, byte separator, LazySerDeParameters lazyParams) { + this(); this.typeInfos = typeInfos; + // Field length is difference between positions hence one extra. + startPosition = new int[typeInfos.length + 1]; + this.separator = separator; isEscaped = lazyParams.isEscaped(); escapeChar = lazyParams.getEscapeChar(); nullSequenceBytes = lazyParams.getNullSequence().getBytes(); isExtendedBooleanLiteral = lazyParams.isExtendedBooleanLiteral(); + lastColumnTakesRest = lazyParams.isLastColumnTakesRest(); fieldCount = typeInfos.length; tempText = new Text(); - readBeyondConfiguredFieldsWarned = false; - readBeyondBufferRangeWarned = false; - bufferRangeHasExtraDataWarned = false; + extraFieldWarned = false; + missingFieldWarned = false; + + columnsToInclude = null; + } + + public LazySimpleDeserializeRead(TypeInfo[] typeInfos, LazySerDeParameters lazyParams) { + this(typeInfos, lazyParams.getSeparators()[0], lazyParams); } // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. @@ -143,6 +157,16 @@ private LazySimpleDeserializeRead() { } /* + * If some fields are are not going to be used by the query, use this routine to specify + * the columns to return. The readCheckNull method will automatically return NULL for the + * other columns. + */ + @Override + public void setColumnsToInclude(boolean[] columnsToInclude) { + this.columnsToInclude = columnsToInclude; + } + + /* * Set the range of bytes to be deserialized. */ @Override @@ -154,6 +178,68 @@ public void set(byte[] bytes, int offset, int length) { fieldIndex = -1; } + /** + * Parse the byte[] and fill each field. + * + * This is an adapted version of the parse method in the LazyStruct class. + * They should parse things the same way. + */ + private void parse() { + + int structByteEnd = end; + int fieldId = 0; + int fieldByteBegin = start; + int fieldByteEnd = start; + + // Go through all bytes in the byte[] + while (fieldByteEnd <= structByteEnd) { + if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) { + // Reached the end of a field? + if (lastColumnTakesRest && fieldId == fieldCount - 1) { + fieldByteEnd = structByteEnd; + } + startPosition[fieldId] = fieldByteBegin; + fieldId++; + if (fieldId == fieldCount || fieldByteEnd == structByteEnd) { + // All fields have been parsed, or bytes have been parsed. + // We need to set the startPosition of fields.length to ensure we + // can use the same formula to calculate the length of each field. + // For missing fields, their starting positions will all be the same, + // which will make their lengths to be -1 and uncheckedGetField will + // return these fields as NULLs. + for (int i = fieldId; i <= fieldCount; i++) { + startPosition[i] = fieldByteEnd + 1; + } + break; + } + fieldByteBegin = fieldByteEnd + 1; + fieldByteEnd++; + } else { + if (isEscaped && bytes[fieldByteEnd] == escapeChar + && fieldByteEnd + 1 < structByteEnd) { + // ignore the char after escape_char + fieldByteEnd += 2; + } else { + fieldByteEnd++; + } + } + } + + // Extra bytes at the end? + if (!extraFieldWarned && fieldByteEnd < structByteEnd) { + extraFieldWarned = true; + LOG.warn("Extra bytes detected at the end of the row! Ignoring similar " + + "problems."); + } + + // Missing fields? + if (!missingFieldWarned && fieldId < fieldCount) { + missingFieldWarned = true; + LOG.info("Missing fields! Expected " + fieldCount + " fields but " + + "only got " + fieldId + "! Ignoring similar problems."); + } + } + /* * Reads the NULL information for a field. * @@ -162,57 +248,24 @@ public void set(byte[] bytes, int offset, int length) { */ @Override public boolean readCheckNull() { - if (++fieldIndex >= fieldCount) { - // Reading beyond the specified field count produces NULL. - if (!readBeyondConfiguredFieldsWarned) { - // Warn only once. - LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but " - + " reading more (NULLs returned). Ignoring similar problems."); - readBeyondConfiguredFieldsWarned = true; - } + if (fieldIndex == -1) { + parse(); + fieldIndex = 0; + } else if (fieldIndex + 1 >= fieldCount) { return true; + } else { + fieldIndex++; } - if (offset > end) { - // We must allow for an empty field at the end, so no strict >= checking. - if (!readBeyondBufferRangeWarned) { - // Warn only once. - int length = end - start; - LOG.info("Reading beyond buffer range! Buffer range " + start - + " for length " + length + " but reading more (NULLs returned)." - + " Ignoring similar problems."); - readBeyondBufferRangeWarned = true; - } - - // char[] charsBuffer = new char[end - start]; - // for (int c = 0; c < charsBuffer.length; c++) { - // charsBuffer[c] = (char) (bytes[start + c] & 0xFF); - // } + // Do we want this field? + if (columnsToInclude != null && !columnsToInclude[fieldIndex]) { return true; } - fieldStart = offset; - while (true) { - if (offset >= end) { - fieldLength = offset - fieldStart; - break; - } - if (bytes[offset] == separator) { - fieldLength = (offset++ - fieldStart); - break; - } - if (isEscaped && bytes[offset] == escapeChar - && offset + 1 < end) { - // Ignore the char after escape char. - offset += 2; - } else { - offset++; - } - } - - char[] charField = new char[fieldLength]; - for (int c = 0; c < charField.length; c++) { - charField[c] = (char) (bytes[fieldStart + c] & 0xFF); + fieldStart = startPosition[fieldIndex]; + fieldLength = startPosition[fieldIndex + 1] - startPosition[fieldIndex] - 1; + if (fieldLength < 0) { + return true; } // Is the field the configured string representing NULL? @@ -520,17 +573,7 @@ public void logExceptionMessage(byte[] bytes, int bytesStart, int bytesLength, S */ @Override public void extraFieldsCheck() { - if (offset < end) { - // We did not consume all of the byte range. - if (!bufferRangeHasExtraDataWarned) { - // Warn only once. - int length = end - start; - LOG.info("Not all fields were read in the buffer range! Buffer range " + start - + " for length " + length + " but reading more (NULLs returned)." - + " Ignoring similar problems."); - bufferRangeHasExtraDataWarned = true; - } - } + // UNDONE: Get rid of... } /* @@ -538,15 +581,15 @@ public void extraFieldsCheck() { */ @Override public boolean readBeyondConfiguredFieldsWarned() { - return readBeyondConfiguredFieldsWarned; + return missingFieldWarned; } @Override public boolean readBeyondBufferRangeWarned() { - return readBeyondBufferRangeWarned; + return extraFieldWarned; } @Override public boolean bufferRangeHasExtraDataWarned() { - return bufferRangeHasExtraDataWarned; + return false; // UNDONE: Get rid of... } /* diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java index c5f0730..91df68d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java @@ -82,6 +82,8 @@ private boolean readBeyondBufferRangeWarned; private boolean bufferRangeHasExtraDataWarned; + private boolean[] columnsToInclude; + public LazyBinaryDeserializeRead(TypeInfo[] typeInfos) { this.typeInfos = typeInfos; fieldCount = typeInfos.length; @@ -90,6 +92,8 @@ public LazyBinaryDeserializeRead(TypeInfo[] typeInfos) { readBeyondConfiguredFieldsWarned = false; readBeyondBufferRangeWarned = false; bufferRangeHasExtraDataWarned = false; + + columnsToInclude = null; } // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. @@ -104,6 +108,16 @@ private LazyBinaryDeserializeRead() { } /* + * If some fields are are not going to be used by the query, use this routine to specify + * the columns to return. The readCheckNull method will automatically return NULL for the + * other columns. + */ + @Override + public void setColumnsToInclude(boolean[] columnsToInclude) { + this.columnsToInclude = columnsToInclude; + } + + /* * Set the range of bytes to be deserialized. */ @Override @@ -155,13 +169,35 @@ public boolean readCheckNull() throws IOException { // We have a field and are positioned to it. - if (((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { - return false; - } + // Do we want this field? + if (columnsToInclude != null && !columnsToInclude[fieldIndex]) { + + // When NULL, we need to move past this field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } - // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read - // it here. - return earlyReadHiveDecimal(); + return true; + } else { + PrimitiveCategory primitiveCategory = + ((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory(); + if (primitiveCategory != PrimitiveCategory.DECIMAL) { + return false; + } + + // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read + // it here. + return earlyReadHiveDecimal(); + } } // When NULL, we need to move past this field.