diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 73e6c21..97fc660 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1642,7 +1642,7 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { HIVEOUTERJOINSUPPORTSFILTERS("hive.outerjoin.supports.filters", true, ""), - HIVEFETCHTASKCONVERSION("hive.fetch.task.conversion", "more", new StringSet("none", "minimal", "more"), + HIVEFETCHTASKCONVERSION("hive.fetch.task.conversion", "none", new StringSet("none", "minimal", "more"), "Some select queries can be converted to single FETCH task minimizing latency.\n" + "Currently the query should be single sourced not having any subquery and should not have\n" + "any aggregations or distincts (which incurs RS), lateral views and joins.\n" + @@ -2255,7 +2255,7 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "and use it to run queries."), // Vectorization enabled - HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", false, + HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", true, "This flag should be set to true to enable vectorized mode of query execution.\n" + "The default value is false."), HIVE_VECTORIZATION_REDUCE_ENABLED("hive.vectorized.execution.reduce.enabled", true, @@ -2295,6 +2295,15 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "This flag should be set to true to enable the new vectorization\n" + "of queries using ReduceSink.\ni" + "The default value is true."), + HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT("hive.vectorized.use.vectorized.input.format", false, + "This flag should be set to true to enable vectorizing with vectorized input file format capable SerDe.\n" + + "The default value is true."), + HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE("hive.vectorized.use.vector.serde.deserialize", true, + "This flag should be set to true to enable vectorizing rows using vector deserialize.\n" + + "The default value is true."), + HIVE_VECTORIZATION_USE_ROW_DESERIALIZE("hive.vectorized.use.row.serde.deserialize", true, + "This flag should be set to true to enable vectorizing using row deserialize.\n" + + "The default value is true."), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " + "whether to check, convert, and normalize partition value to conform to its column type in " + "partition operations including but not limited to insert, such as alter, describe etc."), diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index a3d71c0..b80cfa3 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -97,7 +97,7 @@ sourceInputFormat.getRecordReader(split, job, reporter); return rr; } - boolean isVectorMode = Utilities.isVectorMode(job); + boolean isVectorMode = Utilities.getUseVectorizedInputFileFormat(job); if (!isVectorMode) { LlapIoImpl.LOG.error("No LLAP IO in non-vectorized mode"); throw new UnsupportedOperationException("No LLAP IO in non-vectorized mode"); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapOperator.java new file mode 100644 index 0000000..3247c5d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapOperator.java @@ -0,0 +1,180 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.MapOperator.MapOpCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; + + +/** + * Map operator. This triggers overall map side processing. This is a little + * different from regular operators in that it starts off by processing a + * Writable data structure from a Table (instead of a Hive Object). + **/ +@SuppressWarnings("deprecation") +public abstract class AbstractMapOperator extends Operator implements Serializable, Cloneable { + + private static final long serialVersionUID = 1L; + + /** + * Initialization call sequence: + * + * (Operator) setConf(MapWork conf); + * (Operator) initialize(Configuration hconf, ObjectInspector[] inputOIs); + * + * (AbstractMapOperator) setChildren(Configuration hconf) + * + * (Operator) passExecContext(ExecMapperContext execContext) + * (Operator) initializeLocalWork(Configuration hconf) + * + * (AbstractMapOperator) initializeMapOperator(Configuration hconf) + * + * [ (AbstractMapOperator) initializeContexts() ] // exec.tez.MapRecordProcessor only. + * + * (Operator) setReporter(Reporter rep) + * + */ + /** + * Counter. + * + */ + public static enum Counter { + DESERIALIZE_ERRORS, + RECORDS_IN + } + + protected final transient LongWritable deserialize_error_count = new LongWritable(); + protected final transient LongWritable recordCounter = new LongWritable(); + protected transient long numRows = 0; + + private final Map connectedOperators + = new TreeMap(); + + private transient final Map normalizedPaths = new HashMap(); + + private Path normalizePath(String onefile, boolean schemaless) { + //creating Path is expensive, so cache the corresponding + //Path object in normalizedPaths + Path path = normalizedPaths.get(onefile); + if (path == null) { + path = new Path(onefile); + if (schemaless && path.toUri().getScheme() != null) { + path = new Path(path.toUri().getPath()); + } + normalizedPaths.put(onefile, path); + } + return path; + } + + protected String getNominalPath(Path fpath) { + String nominal = null; + boolean schemaless = fpath.toUri().getScheme() == null; + for (String onefile : conf.getPathToAliases().keySet()) { + Path onepath = normalizePath(onefile, schemaless); + Path curfpath = fpath; + if(!schemaless && onepath.toUri().getScheme() == null) { + curfpath = new Path(fpath.toUri().getPath()); + } + // check for the operators who will process rows coming to this Map Operator + if (onepath.toUri().relativize(curfpath.toUri()).equals(curfpath.toUri())) { + // not from this + continue; + } + if (nominal != null) { + throw new IllegalStateException("Ambiguous input path " + fpath); + } + nominal = onefile; + } + if (nominal == null) { + throw new IllegalStateException("Invalid input path " + fpath); + } + return nominal; + } + + public abstract void initEmptyInputChildren(List> children, Configuration hconf) + throws SerDeException, Exception; + + + /** Kryo ctor. */ + protected AbstractMapOperator() { + super(); + } + + public AbstractMapOperator(CompilationOpContext ctx) { + super(ctx); + } + + public abstract void setChildren(Configuration hconf) throws Exception; + + + public void initializeMapOperator(Configuration hconf) throws HiveException { + // set that parent initialization is done and call initialize on children + state = State.INIT; + + statsMap.put(Counter.DESERIALIZE_ERRORS.toString(), deserialize_error_count); + + numRows = 0; + + String context = hconf.get(Operator.CONTEXT_NAME_KEY, ""); + if (context != null && !context.isEmpty()) { + context = "_" + context.replace(" ","_"); + } + statsMap.put(Counter.RECORDS_IN + context, recordCounter); + } + + public abstract void initializeContexts() throws HiveException; + + public abstract Deserializer getCurrentDeserializer(); + + public abstract void process(Writable value) throws HiveException; + + @Override + public void closeOp(boolean abort) throws HiveException { + recordCounter.set(numRows); + super.closeOp(abort); + } + + public void clearConnectedOperators() { + connectedOperators.clear(); + } + + public void setConnectedOperators(int tag, DummyStoreOperator dummyOp) { + connectedOperators.put(tag, dummyOp); + } + + public Map getConnectedOperators() { + return connectedOperators; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 2b690f8..681af8d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -18,10 +18,8 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -30,9 +28,6 @@ import java.util.Map.Entry; import java.util.Properties; import java.util.Set; -import java.util.TreeMap; -import java.util.concurrent.Future; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; @@ -42,7 +37,6 @@ import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.RecordIdentifier; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -59,14 +53,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; @@ -78,25 +70,11 @@ * Writable data structure from a Table (instead of a Hive Object). **/ @SuppressWarnings("deprecation") -public class MapOperator extends Operator implements Serializable, Cloneable { +public class MapOperator extends AbstractMapOperator { private static final long serialVersionUID = 1L; - /** - * Counter. - * - */ - public static enum Counter { - DESERIALIZE_ERRORS, - RECORDS_IN - } - - private final transient LongWritable deserialize_error_count = new LongWritable(); - private final transient LongWritable recordCounter = new LongWritable(); - protected transient long numRows = 0; protected transient long cntr = 1; - private final Map connectedOperators - = new TreeMap(); protected transient long logEveryNRows = 0; // input path --> {operator --> context} @@ -108,7 +86,6 @@ // context for current input file protected transient MapOpCtx[] currentCtxs; - private transient final Map normalizedPaths = new HashMap(); protected static class MapOpCtx { @@ -439,31 +416,6 @@ private void initOperatorContext(List> children } } - private String getNominalPath(Path fpath) { - String nominal = null; - boolean schemaless = fpath.toUri().getScheme() == null; - for (String onefile : conf.getPathToAliases().keySet()) { - Path onepath = normalizePath(onefile, schemaless); - Path curfpath = fpath; - if(!schemaless && onepath.toUri().getScheme() == null) { - curfpath = new Path(fpath.toUri().getPath()); - } - // check for the operators who will process rows coming to this Map Operator - if (onepath.toUri().relativize(curfpath.toUri()).equals(curfpath.toUri())) { - // not from this - continue; - } - if (nominal != null) { - throw new IllegalStateException("Ambiguous input path " + fpath); - } - nominal = onefile; - } - if (nominal == null) { - throw new IllegalStateException("Invalid input path " + fpath); - } - return nominal; - } - /** Kryo ctor. */ protected MapOperator() { super(); @@ -479,32 +431,17 @@ public void initializeOp(Configuration hconf) throws HiveException { } public void initializeMapOperator(Configuration hconf) throws HiveException { - // set that parent initialization is done and call initialize on children - state = State.INIT; - statsMap.put(Counter.DESERIALIZE_ERRORS.toString(), deserialize_error_count); + super.initializeMapOperator(hconf); - numRows = 0; cntr = 1; logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS); - String context = hconf.get(Operator.CONTEXT_NAME_KEY, ""); - if (context != null && !context.isEmpty()) { - context = "_" + context.replace(" ","_"); - } - statsMap.put(Counter.RECORDS_IN + context, recordCounter); - for (Entry, StructObjectInspector> entry : childrenOpToOI.entrySet()) { Operator child = entry.getKey(); child.initialize(hconf, new ObjectInspector[] {entry.getValue()}); } } - @Override - public void closeOp(boolean abort) throws HiveException { - recordCounter.set(numRows); - super.closeOp(abort); - } - // Find context for current input file @Override public void cleanUpInputFileChangedOp() throws HiveException { @@ -534,20 +471,6 @@ public void cleanUpInputFileChangedOp() throws HiveException { currentCtxs = contexts.values().toArray(new MapOpCtx[contexts.size()]); } - private Path normalizePath(String onefile, boolean schemaless) { - //creating Path is expensive, so cache the corresponding - //Path object in normalizedPaths - Path path = normalizedPaths.get(onefile); - if (path == null) { - path = new Path(onefile); - if (schemaless && path.toUri().getScheme() != null) { - path = new Path(path.toUri().getPath()); - } - normalizedPaths.put(onefile, path); - } - return path; - } - public void process(Writable value) throws HiveException { // A mapper can span multiple files/partitions. // The serializers need to be reset if the input file changed @@ -704,16 +627,4 @@ public Deserializer getCurrentDeserializer() { return currentCtxs[0].deserializer; } - - public void clearConnectedOperators() { - connectedOperators.clear(); - } - - public void setConnectedOperators(int tag, DummyStoreOperator dummyOp) { - connectedOperators.put(tag, dummyOp); - } - - public Map getConnectedOperators() { - return connectedOperators; - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 5e0553d..1d4eb11 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -215,7 +215,7 @@ public static final String MAPRED_MAPPER_CLASS = "mapred.mapper.class"; public static final String MAPRED_REDUCER_CLASS = "mapred.reducer.class"; public static final String HIVE_ADDED_JARS = "hive.added.jars"; - public static final String VECTOR_MODE = "VECTOR_MODE"; + public static final String USE_VECTORIZED_INPUT_FILE_FORMAT = "USE_VECTORIZED_INPUT_FILE_FORMAT"; public static String MAPNAME = "Map "; public static String REDUCENAME = "Reducer "; @@ -3254,21 +3254,27 @@ private static void resetUmaskInConf(Configuration conf, boolean unsetUmask, Str * and vectorization is allowed. The plan may be configured for vectorization * but vectorization disallowed eg. for FetchOperator execution. */ - public static boolean isVectorMode(Configuration conf) { - if (conf.get(VECTOR_MODE) != null) { + public static boolean getUseVectorizedInputFileFormat(Configuration conf) { + if (conf.get(USE_VECTORIZED_INPUT_FILE_FORMAT) != null) { // this code path is necessary, because with HS2 and client // side split generation we end up not finding the map work. // This is because of thread local madness (tez split // generation is multi-threaded - HS2 plan cache uses thread // locals). - return conf.getBoolean(VECTOR_MODE, false); + return conf.getBoolean(USE_VECTORIZED_INPUT_FILE_FORMAT, false); } else { return HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) && Utilities.getPlanPath(conf) != null - && Utilities.getMapWork(conf).getVectorMode(); + && Utilities.getMapWork(conf).getUseVectorizedInputFileFormat(); } } + + public static boolean getUseVectorizedInputFileFormat(Configuration conf, MapWork mapWork) { + return HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) + && mapWork.getUseVectorizedInputFileFormat(); + } + /** * @param conf * @return the configured VectorizedRowBatchCtx for a MapWork task. @@ -3285,11 +3291,6 @@ public static VectorizedRowBatchCtx getVectorizedRowBatchCtx(Configuration conf) return result; } - public static boolean isVectorMode(Configuration conf, MapWork mapWork) { - return HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) - && mapWork.getVectorMode(); - } - public static void clearWorkMapForConf(Configuration conf) { // Remove cached query plans for the current query only Path mapPath = getPlanPath(conf, MAP_PLAN_NAME); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java index c34dd1f..f90a788 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java @@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; import org.apache.hadoop.hive.ql.exec.MapOperator; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.Operator; @@ -59,7 +60,7 @@ */ public class ExecMapper extends MapReduceBase implements Mapper { - private MapOperator mo; + private AbstractMapOperator mo; private OutputCollector oc; private JobConf jc; private boolean abort = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java index d8fe35f..48dfedc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java @@ -25,6 +25,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; import org.apache.hadoop.hive.ql.exec.MapOperator; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.Operator; @@ -55,7 +56,7 @@ */ public class SparkMapRecordHandler extends SparkRecordHandler { private static final Logger LOG = LoggerFactory.getLogger(SparkMapRecordHandler.class); - private MapOperator mo; + private AbstractMapOperator mo; private MapredLocalWork localWork = null; private boolean isLogInfoEnabled = false; private ExecMapperContext execContext; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index a1b7445..47a80ec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -630,9 +630,9 @@ private Vertex createVertex(JobConf conf, MapWork mapWork, // generation we end up not finding the map work. This is // because of thread local madness (tez split generation is // multi-threaded - HS2 plan cache uses thread locals). Setting - // VECTOR_MODE causes the split gen code to use the conf instead + // USE_VECTORIZED_INPUT_FILE_FORMAT causes the split gen code to use the conf instead // of the map work. - conf.setBoolean(Utilities.VECTOR_MODE, mapWork.getVectorMode()); + conf.setBoolean(Utilities.USE_VECTORIZED_INPUT_FILE_FORMAT, mapWork.getUseVectorizedInputFileFormat()); dataSource = MRInputHelpers.configureMRInputWithLegacySplitGeneration(conf, new Path(tezDir, "split_" + mapWork.getName().replaceAll(" ", "_")), true); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java index 0584ad8..9a9f43a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java @@ -33,6 +33,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; import org.apache.hadoop.hive.llap.io.api.LlapProxy; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; @@ -75,8 +76,8 @@ public static final Logger l4j = LoggerFactory.getLogger(MapRecordProcessor.class); protected static final String MAP_PLAN_KEY = "__MAP_PLAN__"; - private MapOperator mapOp; - private final List mergeMapOpList = new ArrayList(); + private AbstractMapOperator mapOp; + private final List mergeMapOpList = new ArrayList(); private MapRecordSource[] sources; private final Map multiMRInputMap = new HashMap(); private int position; @@ -183,7 +184,7 @@ public Object call() { boolean fromCache = false; if (mergeWorkList != null) { - MapOperator mergeMapOp = null; + AbstractMapOperator mergeMapOp = null; for (BaseWork mergeWork : mergeWorkList) { MapWork mergeMapWork = (MapWork) mergeWork; if (mergeMapWork.getVectorMode()) { @@ -261,7 +262,7 @@ public Object call() { initializeMapRecordSources(); mapOp.initializeMapOperator(jconf); if ((mergeMapOpList != null) && mergeMapOpList.isEmpty() == false) { - for (MapOperator mergeMapOp : mergeMapOpList) { + for (AbstractMapOperator mergeMapOp : mergeMapOpList) { jconf.set(Utilities.INPUT_NAME, mergeMapOp.getConf().getName()); mergeMapOp.initializeMapOperator(jconf); } @@ -309,7 +310,7 @@ private void initializeMapRecordSources() throws Exception { reader = legacyMRInput.getReader(); } sources[position].init(jconf, mapOp, reader); - for (MapOperator mapOp : mergeMapOpList) { + for (AbstractMapOperator mapOp : mergeMapOpList) { int tag = mapOp.getConf().getTag(); sources[tag] = new MapRecordSource(); String inputName = mapOp.getConf().getName(); @@ -326,7 +327,7 @@ private void initializeMapRecordSources() throws Exception { @SuppressWarnings("deprecation") private KeyValueReader getKeyValueReader(Collection keyValueReaders, - MapOperator mapOp) + AbstractMapOperator mapOp) throws Exception { List kvReaderList = new ArrayList(keyValueReaders); // this sets up the map operator contexts correctly @@ -394,7 +395,7 @@ void close(){ } mapOp.close(abort); if (mergeMapOpList.isEmpty() == false) { - for (MapOperator mergeMapOp : mergeMapOpList) { + for (AbstractMapOperator mergeMapOp : mergeMapOpList) { mergeMapOp.close(abort); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java index b53c933..add7d08 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java @@ -21,7 +21,7 @@ import java.io.IOException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.ql.exec.MapOperator; +import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.Writable; @@ -39,11 +39,11 @@ public static final Logger LOG = LoggerFactory.getLogger(MapRecordSource.class); private ExecMapperContext execContext = null; - private MapOperator mapOp = null; + private AbstractMapOperator mapOp = null; private KeyValueReader reader = null; private final boolean grouped = false; - void init(JobConf jconf, MapOperator mapOp, KeyValueReader reader) throws IOException { + void init(JobConf jconf, AbstractMapOperator mapOp, KeyValueReader reader) throws IOException { execContext = mapOp.getExecContext(); this.mapOp = mapOp; this.reader = reader; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index d69454f..9f0fb67 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.sql.Date; import java.sql.Timestamp; + +import java.util.ArrayList; import java.util.List; import org.slf4j.Logger; @@ -40,10 +43,20 @@ import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; @@ -180,6 +193,8 @@ void assign(int batchIndex, Object object) { } } + static int fake = 0; + private class IntAssigner extends AbstractLongAssigner { IntAssigner(int columnIndex) { @@ -191,6 +206,9 @@ void assign(int batchIndex, Object object) { if (object == null) { VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); } else { + if (!(object instanceof IntWritable)) { + fake++; + } IntWritable iw = (IntWritable) object; vector[batchIndex] = iw.get(); colVector.isNull[batchIndex] = false; @@ -456,11 +474,11 @@ void assign(int batchIndex, Object object) { } } - private class DecimalAssigner extends Assigner { + private abstract class AbstractDecimalAssigner extends Assigner { protected DecimalColumnVector colVector; - DecimalAssigner(int columnIndex) { + AbstractDecimalAssigner(int columnIndex) { super(columnIndex); } @@ -473,6 +491,13 @@ void setColumnVector(VectorizedRowBatch batch) { void forgetColumnVector() { colVector = null; } + } + + private class DecimalAssigner extends AbstractDecimalAssigner { + + DecimalAssigner(int columnIndex) { + super(columnIndex); + } @Override void assign(int batchIndex, Object object) { @@ -489,6 +514,590 @@ void assign(int batchIndex, Object object) { } } + //------------------------------------------------------------------------------------------------ + + protected class ConvertToVoidAssigner extends Assigner { + + ConvertToVoidAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + } + + @Override + void forgetColumnVector() { + } + + @Override + void assign(int batchIndex, Object object) { + // No-op. + } + } + + protected abstract class AbstractConvertToLongAssigner extends AbstractLongAssigner { + + protected final PrimitiveObjectInspector sourcePrimitiveObjectInspector; + + AbstractConvertToLongAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex); + this.sourcePrimitiveObjectInspector = sourcePrimitiveObjectInspector; + } + } + + protected class ConvertToBooleanAssigner extends AbstractConvertToLongAssigner { + + ConvertToBooleanAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + vector[batchIndex] = + (PrimitiveObjectInspectorUtils.getBoolean( + object, sourcePrimitiveObjectInspector) ? 1 : 0); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToByteAssigner extends AbstractConvertToLongAssigner { + + ConvertToByteAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + vector[batchIndex] = + PrimitiveObjectInspectorUtils.getByte( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToShortAssigner extends AbstractConvertToLongAssigner { + + ConvertToShortAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + vector[batchIndex] = + PrimitiveObjectInspectorUtils.getShort( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToIntAssigner extends AbstractConvertToLongAssigner { + + ConvertToIntAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + vector[batchIndex] = + PrimitiveObjectInspectorUtils.getInt( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToLongAssigner extends AbstractConvertToLongAssigner { + + ConvertToLongAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + vector[batchIndex] = + PrimitiveObjectInspectorUtils.getLong( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToDateAssigner extends AbstractConvertToLongAssigner { + + DateWritable dateWritable; + + ConvertToDateAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + dateWritable = new DateWritable(0); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + Date date = PrimitiveObjectInspectorUtils.getDate( + object, sourcePrimitiveObjectInspector); + if (date == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + dateWritable.set(date); + vector[batchIndex] = dateWritable.getDays(); + } + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToTimestampAssigner extends AbstractConvertToLongAssigner { + + ConvertToTimestampAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + Timestamp timestamp = + PrimitiveObjectInspectorUtils.getTimestamp( + object, sourcePrimitiveObjectInspector); + if (timestamp == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + vector[batchIndex] = TimestampUtils.getTimeNanoSec(timestamp); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToIntervalYearMonthAssigner extends AbstractConvertToLongAssigner { + + ConvertToIntervalYearMonthAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + HiveIntervalYearMonth intervalYearMonth = + PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth( + object, sourcePrimitiveObjectInspector); + if (intervalYearMonth == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + vector[batchIndex] = intervalYearMonth.getTotalMonths(); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToIntervalDayTimeAssigner extends AbstractConvertToLongAssigner { + + ConvertToIntervalDayTimeAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + HiveIntervalDayTime intervalDayTime = + PrimitiveObjectInspectorUtils.getHiveIntervalDayTime( + object, sourcePrimitiveObjectInspector); + if (intervalDayTime == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + vector[batchIndex] = DateUtils.getIntervalDayTimeTotalNanos(intervalDayTime); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected abstract class AbstractConvertToDoubleAssigner extends AbstractDoubleAssigner { + + protected final PrimitiveObjectInspector sourcePrimitiveObjectInspector; + + AbstractConvertToDoubleAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex); + this.sourcePrimitiveObjectInspector = sourcePrimitiveObjectInspector; + } + } + + protected class ConvertToFloatAssigner extends AbstractConvertToDoubleAssigner { + + ConvertToFloatAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + vector[batchIndex] = + PrimitiveObjectInspectorUtils.getFloat( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToDoubleAssigner extends AbstractConvertToDoubleAssigner { + + ConvertToDoubleAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + vector[batchIndex] = + PrimitiveObjectInspectorUtils.getDouble( + object, sourcePrimitiveObjectInspector); + colVector.isNull[batchIndex] = false; + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected abstract class AbstractConvertToBytesAssigner extends AbstractBytesAssigner { + + protected final PrimitiveObjectInspector sourcePrimitiveObjectInspector; + + AbstractConvertToBytesAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex); + this.sourcePrimitiveObjectInspector = sourcePrimitiveObjectInspector; + } + } + + protected class ConvertToBinaryAssigner extends AbstractConvertToBytesAssigner { + + ConvertToBinaryAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + BytesWritable bytesWritable = + PrimitiveObjectInspectorUtils.getBinary( + object, sourcePrimitiveObjectInspector); + if (bytesWritable == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + colVector.setVal(batchIndex, bytesWritable.getBytes(), 0, bytesWritable.getLength()); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToStringAssigner extends AbstractConvertToBytesAssigner { + + private final Text text; + + ConvertToStringAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + text = new Text(); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + String string = PrimitiveObjectInspectorUtils.getString( + object, sourcePrimitiveObjectInspector); + if (string == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + text.set(string); + colVector.setVal(batchIndex, text.getBytes(), 0, text.getLength()); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToVarCharAssigner extends AbstractConvertToBytesAssigner { + + ConvertToVarCharAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + HiveVarchar hiveVarchar = + PrimitiveObjectInspectorUtils.getHiveVarchar( + object, sourcePrimitiveObjectInspector); + if (hiveVarchar == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + byte[] bytes = hiveVarchar.getValue().getBytes(); + + // UNDONE: Trim to target maximum length. + + colVector.setVal(batchIndex, bytes, 0, bytes.length); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected class ConvertToCharAssigner extends AbstractConvertToBytesAssigner { + + ConvertToCharAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + HiveChar hiveChar = + PrimitiveObjectInspectorUtils.getHiveChar( + object, sourcePrimitiveObjectInspector); + if (hiveChar == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + byte[] bytes = hiveChar.getValue().getBytes(); + + // UNDONE: Trim to target maximum length. + + colVector.setVal(batchIndex, bytes, 0, bytes.length); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + protected abstract class AbstractConvertToDecimalAssigner extends AbstractDecimalAssigner { + + protected final PrimitiveObjectInspector sourcePrimitiveObjectInspector; + + AbstractConvertToDecimalAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex); + this.sourcePrimitiveObjectInspector = sourcePrimitiveObjectInspector; + } + } + + protected class ConvertToDecimalAssigner extends AbstractConvertToDecimalAssigner { + + ConvertToDecimalAssigner(int columnIndex, + PrimitiveObjectInspector sourcePrimitiveObjectInspector) { + super(columnIndex, sourcePrimitiveObjectInspector); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + try { + HiveDecimal hiveDecimal = + PrimitiveObjectInspectorUtils.getHiveDecimal( + object, sourcePrimitiveObjectInspector); + if (hiveDecimal == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + colVector.set(batchIndex, hiveDecimal); + colVector.isNull[batchIndex] = false; + } + } catch (NumberFormatException e) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } + } + } + } + + private Assigner createConversionAssigner(TypeInfo sourceTypeInfo, + TypeInfo targetTypeInfo, int columnIndex) throws HiveException { + + PrimitiveObjectInspector sourcePrimitiveObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) sourceTypeInfo); + + PrimitiveTypeInfo targetPrimitiveTypeInfo = (PrimitiveTypeInfo) targetTypeInfo; + PrimitiveCategory targetPrimitiveCategory = + targetPrimitiveTypeInfo.getPrimitiveCategory(); + + Assigner assigner = null; + switch (targetPrimitiveCategory) { + case VOID: + assigner = new ConvertToVoidAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case BOOLEAN: + assigner = new ConvertToBooleanAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case BYTE: + assigner = new ConvertToByteAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case SHORT: + assigner = new ConvertToShortAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case INT: + assigner = new ConvertToIntAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case LONG: + assigner = new ConvertToLongAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case TIMESTAMP: + assigner = new ConvertToTimestampAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case DATE: + assigner = new ConvertToDateAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case FLOAT: + assigner = new ConvertToFloatAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case DOUBLE: + assigner = new ConvertToDoubleAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case BINARY: + assigner = new ConvertToBinaryAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case STRING: + assigner = new ConvertToStringAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case VARCHAR: + assigner = new ConvertToVarCharAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case CHAR: + assigner = new ConvertToCharAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case DECIMAL: + assigner = new ConvertToDecimalAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case INTERVAL_YEAR_MONTH: + assigner = new ConvertToIntervalYearMonthAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + case INTERVAL_DAY_TIME: + assigner = new ConvertToIntervalDayTimeAssigner(columnIndex, sourcePrimitiveObjectInspector); + break; + default: + throw new HiveException("No vector row assigner for target primitive category " + + targetPrimitiveCategory); + } + + return assigner; + } + + //------------------------------------------------------------------------------------------------ + private Assigner createAssigner(PrimitiveTypeInfo primitiveTypeInfo, int columnIndex) throws HiveException { PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); Assigner assigner; @@ -552,11 +1161,13 @@ private Assigner createAssigner(PrimitiveTypeInfo primitiveTypeInfo, int columnI } Assigner[] assigners; + PrimitiveTypeInfo[] primitiveTypeInfos; public void init(StructObjectInspector structObjectInspector, List projectedColumns) throws HiveException { List fields = structObjectInspector.getAllStructFieldRefs(); assigners = new Assigner[fields.size()]; + primitiveTypeInfos = new PrimitiveTypeInfo[fields.size()]; int i = 0; for (StructField field : fields) { @@ -565,18 +1176,87 @@ public void init(StructObjectInspector structObjectInspector, List proj PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString( fieldInspector.getTypeName()); assigners[i] = createAssigner(primitiveTypeInfo, columnIndex); + primitiveTypeInfos[i] = primitiveTypeInfo; i++; } } + public void init(StructObjectInspector structObjectInspector) throws HiveException { + + List fields = structObjectInspector.getAllStructFieldRefs(); + assigners = new Assigner[fields.size()]; + primitiveTypeInfos = new PrimitiveTypeInfo[fields.size()]; + + int i = 0; + for (StructField field : fields) { + ObjectInspector fieldInspector = field.getFieldObjectInspector(); + PrimitiveTypeInfo primitiveTypeInfo = + (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString( + fieldInspector.getTypeName()); + assigners[i] = createAssigner(primitiveTypeInfo, i); + primitiveTypeInfos[i] = primitiveTypeInfo; + i++; + } + } + + PrimitiveTypeInfo[] sourcePrimitiveTypeInfos; + + public void init(TypeInfo[] sourceTypeInfos, TypeInfo[] targetTypeInfos, + boolean[] conversionFlags, boolean[] columnsToIncludeTruncated) throws HiveException { + + int columnCount = columnsToIncludeTruncated == null ? + sourceTypeInfos.length : columnsToIncludeTruncated.length; + + assigners = new Assigner[columnCount]; + primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + sourcePrimitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + + for (int i = 0; i < columnCount; i++) { + + Assigner assigner; + PrimitiveTypeInfo targetPrimitiveTypeInfo = null; + PrimitiveTypeInfo sourcePrimitiveTypeInfo = null; + + if (columnsToIncludeTruncated != null && !columnsToIncludeTruncated[i]) { + + // Field not included in query. + assigner = null; + + } else { + TypeInfo targetTypeInfo = targetTypeInfos[i]; + + if (targetTypeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) { + + // For now, we don't have an assigner for complex types... + assigner = null; + } else { + targetPrimitiveTypeInfo = (PrimitiveTypeInfo) targetTypeInfo; + + if (conversionFlags != null && conversionFlags[i]) { + sourcePrimitiveTypeInfo = (PrimitiveTypeInfo) sourceTypeInfos[i]; + assigner = createConversionAssigner(sourcePrimitiveTypeInfo, + targetPrimitiveTypeInfo, i); + } else { + assigner = createAssigner(targetPrimitiveTypeInfo, i); + } + } + } + + assigners[i] = assigner; + primitiveTypeInfos[i] = targetPrimitiveTypeInfo; + sourcePrimitiveTypeInfos[i] = sourcePrimitiveTypeInfo; + } + } public void init(List typeNames) throws HiveException { assigners = new Assigner[typeNames.size()]; + primitiveTypeInfos = new PrimitiveTypeInfo[typeNames.size()]; int i = 0; for (String typeName : typeNames) { PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); assigners[i] = createAssigner(primitiveTypeInfo, i); + primitiveTypeInfos[i] = primitiveTypeInfo; i++; } } @@ -584,17 +1264,21 @@ public void init(List typeNames) throws HiveException { protected void setBatch(VectorizedRowBatch batch) throws HiveException { for (int i = 0; i < assigners.length; i++) { Assigner assigner = assigners[i]; - int columnIndex = assigner.getColumnIndex(); - if (batch.cols[columnIndex] == null) { - throw new HiveException("Unexpected null vector column " + columnIndex); + if (assigner != null) { + int columnIndex = assigner.getColumnIndex(); + if (batch.cols[columnIndex] == null) { + throw new HiveException("Unexpected null vector column " + columnIndex); + } + assigner.setColumnVector(batch); } - assigner.setColumnVector(batch); } } protected void forgetBatch() { for (Assigner assigner : assigners) { - assigner.forgetColumnVector(); + if (assigner != null) { + assigner.forgetColumnVector(); + } } } @@ -605,8 +1289,26 @@ public void assignRowColumn(int batchIndex, int logicalColumnIndex, Object objec public void assignRow(int batchIndex, Object[] objects) { int i = 0; for (Assigner assigner : assigners) { - assigner.assign(batchIndex, objects[i++]); + if (assigner != null) { + assigner.assign(batchIndex, objects[i]); + } + i++; } } + public void assignRow(int batchIndex, Object object, + StructObjectInspector structObjectInspector) { + + /* Convert input row to standard objects. */ + List standardObjects = new ArrayList(); + ObjectInspectorUtils.copyToStandardObject(standardObjects, object, + structObjectInspector, ObjectInspectorCopyOption.WRITABLE); + + for (int i = 0; i < standardObjects.size(); i++) { + Assigner assigner = assigners[i]; + if (assigner != null) { + assigner.assign(batchIndex, standardObjects.get(i)); + } + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 9b086b8..f9567e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -21,10 +21,12 @@ import java.io.EOFException; import java.io.IOException; import java.sql.Timestamp; +import java.util.HashMap; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -38,6 +40,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hive.common.util.DateUtils; +import com.google.common.base.Preconditions; + /** * This class deserializes a serialization format into a row of a VectorizedRowBatch. * @@ -73,7 +77,7 @@ private VectorDeserializeRow() { private abstract class Reader { protected int columnIndex; - Reader(int columnIndex) { + Reader(TypeInfo typeInfo, int columnIndex) { this.columnIndex = columnIndex; } @@ -82,15 +86,15 @@ private VectorDeserializeRow() { private abstract class AbstractLongReader extends Reader { - AbstractLongReader(int columnIndex) { - super(columnIndex); + AbstractLongReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } } private class BooleanReader extends AbstractLongReader { - BooleanReader(int columnIndex) { - super(columnIndex); + BooleanReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override @@ -109,8 +113,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private class ByteReader extends AbstractLongReader { - ByteReader(int columnIndex) { - super(columnIndex); + ByteReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override @@ -129,8 +133,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private class ShortReader extends AbstractLongReader { - ShortReader(int columnIndex) { - super(columnIndex); + ShortReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override @@ -149,8 +153,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private class IntReader extends AbstractLongReader { - IntReader(int columnIndex) { - super(columnIndex); + IntReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override @@ -169,8 +173,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private class LongReader extends AbstractLongReader { - LongReader(int columnIndex) { - super(columnIndex); + LongReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override @@ -191,8 +195,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { DeserializeRead.ReadDateResults readDateResults; - DateReader(int columnIndex) { - super(columnIndex); + DateReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readDateResults = deserializeRead.createReadDateResults(); } @@ -214,8 +218,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { DeserializeRead.ReadTimestampResults readTimestampResults; - TimestampReader(int columnIndex) { - super(columnIndex); + TimestampReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readTimestampResults = deserializeRead.createReadTimestampResults(); } @@ -238,8 +242,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { DeserializeRead.ReadIntervalYearMonthResults readIntervalYearMonthResults; - IntervalYearMonthReader(int columnIndex) { - super(columnIndex); + IntervalYearMonthReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); } @@ -262,8 +266,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { DeserializeRead.ReadIntervalDayTimeResults readIntervalDayTimeResults; - IntervalDayTimeReader(int columnIndex) { - super(columnIndex); + IntervalDayTimeReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readIntervalDayTimeResults = deserializeRead.createReadIntervalDayTimeResults(); } @@ -284,15 +288,15 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private abstract class AbstractDoubleReader extends Reader { - AbstractDoubleReader(int columnIndex) { - super(columnIndex); + AbstractDoubleReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } } private class FloatReader extends AbstractDoubleReader { - FloatReader(int columnIndex) { - super(columnIndex); + FloatReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override @@ -311,8 +315,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private class DoubleReader extends AbstractDoubleReader { - DoubleReader(int columnIndex) { - super(columnIndex); + DoubleReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } @Override @@ -331,8 +335,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private abstract class AbstractBytesReader extends Reader { - AbstractBytesReader(int columnIndex) { - super(columnIndex); + AbstractBytesReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); } } @@ -340,8 +344,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private DeserializeRead.ReadStringResults readStringResults; - StringReaderByValue(int columnIndex) { - super(columnIndex); + StringReaderByValue(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); } @@ -364,8 +368,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private DeserializeRead.ReadStringResults readStringResults; - StringReaderByReference(int columnIndex) { - super(columnIndex); + StringReaderByReference(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); } @@ -390,10 +394,10 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private CharTypeInfo charTypeInfo; - CharReaderByValue(CharTypeInfo charTypeInfo, int columnIndex) { - super(columnIndex); + CharReaderByValue(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); - this.charTypeInfo = charTypeInfo; + this.charTypeInfo = (CharTypeInfo) typeInfo; } @Override @@ -420,10 +424,10 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private CharTypeInfo charTypeInfo; - CharReaderByReference(CharTypeInfo charTypeInfo, int columnIndex) { - super(columnIndex); + CharReaderByReference(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); - this.charTypeInfo = charTypeInfo; + this.charTypeInfo = (CharTypeInfo) typeInfo; } @Override @@ -450,10 +454,10 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private VarcharTypeInfo varcharTypeInfo; - VarcharReaderByValue(VarcharTypeInfo varcharTypeInfo, int columnIndex) { - super(columnIndex); + VarcharReaderByValue(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); - this.varcharTypeInfo = varcharTypeInfo; + this.varcharTypeInfo = (VarcharTypeInfo) typeInfo; } @Override @@ -480,10 +484,10 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private VarcharTypeInfo varcharTypeInfo; - VarcharReaderByReference(VarcharTypeInfo varcharTypeInfo, int columnIndex) { - super(columnIndex); + VarcharReaderByReference(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readStringResults = deserializeRead.createReadStringResults(); - this.varcharTypeInfo = varcharTypeInfo; + this.varcharTypeInfo = (VarcharTypeInfo) typeInfo; } @Override @@ -508,8 +512,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private DeserializeRead.ReadBinaryResults readBinaryResults; - BinaryReaderByValue(int columnIndex) { - super(columnIndex); + BinaryReaderByValue(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readBinaryResults = deserializeRead.createReadBinaryResults(); } @@ -532,8 +536,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private DeserializeRead.ReadBinaryResults readBinaryResults; - BinaryReaderByReference(int columnIndex) { - super(columnIndex); + BinaryReaderByReference(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readBinaryResults = deserializeRead.createReadBinaryResults(); } @@ -556,8 +560,8 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { private DeserializeRead.ReadDecimalResults readDecimalResults; - HiveDecimalReader(int columnIndex) { - super(columnIndex); + HiveDecimalReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); readDecimalResults = deserializeRead.createReadDecimalResults(); } @@ -576,73 +580,149 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } + private class NotIncludedColumnReader extends Reader { + + NotIncludedColumnReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + + if (deserializeRead.readCheckNull()) { + // Ignore not included column. + } else { + throw new RuntimeException("Expected a NULL for not included column"); + } + } + } + + + //------------------------------------------------------------------------------------------------ + + private class IntToDoubleReader extends AbstractDoubleReader { + + IntToDoubleReader(TypeInfo typeInfo, int columnIndex) { + super(typeInfo, columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + int value = deserializeRead.readInt(); + colVector.vector[batchIndex] = (double) value; + } + } + } + + // Currently, we only support these no-precision-loss or promotion data type conversions: + // + // And, all of them stay within the vector column type (Long, Double, Bytes, Decimal) + // for now. + // + // Short -> Int IMPLICIT WITH VECTORIZATION + // Short -> BigInt IMPLICIT WITH VECTORIZATION + // Int --> BigInt IMPLICIT WITH VECTORIZATION + // + // CONSIDER: + // Float -> Double IMPLICIT WITH VECTORIZATION + // (Char | VarChar) -> String IMPLICIT WITH VECTORIZATION + // + private void addConversionReader(TypeInfo targetTypeInfo, int index, + int outputColumn) throws HiveException { + + + TypeInfo typeInfo = typeInfos[index]; + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + + PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfo).getPrimitiveCategory(); + + Reader reader = null; + if (primitiveCategory == PrimitiveCategory.SHORT && + targetPrimitiveCategory == PrimitiveCategory.INT) { + reader = new ShortReader(typeInfo, outputColumn); + } else if (primitiveCategory == PrimitiveCategory.SHORT && + targetPrimitiveCategory == PrimitiveCategory.LONG) { + reader = new ShortReader(typeInfo, outputColumn); + } else if (primitiveCategory == PrimitiveCategory.INT && + targetPrimitiveCategory == PrimitiveCategory.LONG) { + reader = new IntReader(typeInfo, outputColumn); + } else { + throw new HiveException("No conversion from primitive type category " + + primitiveCategory.name() + " to " + targetPrimitiveCategory.name()); + } + + readersByValue[index] = reader; + readersByReference[index] = reader; + } + + //------------------------------------------------------------------------------------------------ + private void addReader(int index, int outputColumn) throws HiveException { Reader readerByValue = null; Reader readerByReference = null; - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfos[index]; - PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + TypeInfo typeInfo = typeInfos[index]; + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); switch (primitiveCategory) { // case VOID: // UNDONE: // break; case BOOLEAN: - readerByValue = new BooleanReader(outputColumn); + readerByValue = new BooleanReader(typeInfo, outputColumn); break; case BYTE: - readerByValue = new ByteReader(outputColumn); + readerByValue = new ByteReader(typeInfo, outputColumn); break; case SHORT: - readerByValue = new ShortReader(outputColumn); + readerByValue = new ShortReader(typeInfo, outputColumn); break; case INT: - readerByValue = new IntReader(outputColumn); + readerByValue = new IntReader(typeInfo, outputColumn); break; case LONG: - readerByValue = new LongReader(outputColumn); + readerByValue = new LongReader(typeInfo, outputColumn); break; case DATE: - readerByValue = new DateReader(outputColumn); + readerByValue = new DateReader(typeInfo, outputColumn); break; case TIMESTAMP: - readerByValue = new TimestampReader(outputColumn); + readerByValue = new TimestampReader(typeInfo, outputColumn); break; case FLOAT: - readerByValue = new FloatReader(outputColumn); + readerByValue = new FloatReader(typeInfo, outputColumn); break; case DOUBLE: - readerByValue = new DoubleReader(outputColumn); + readerByValue = new DoubleReader(typeInfo, outputColumn); break; case STRING: - readerByValue = new StringReaderByValue(outputColumn); - readerByReference = new StringReaderByReference(outputColumn); + readerByValue = new StringReaderByValue(typeInfo, outputColumn); + readerByReference = new StringReaderByReference(typeInfo, outputColumn); break; case CHAR: - { - CharTypeInfo charTypeInfo = (CharTypeInfo) primitiveTypeInfo; - readerByValue = new CharReaderByValue(charTypeInfo, outputColumn); - readerByReference = new CharReaderByReference(charTypeInfo, outputColumn); - } + readerByValue = new CharReaderByValue(typeInfo, outputColumn); + readerByReference = new CharReaderByReference(typeInfo, outputColumn); break; case VARCHAR: - { - VarcharTypeInfo varcharTypeInfo = (VarcharTypeInfo) primitiveTypeInfo; - readerByValue = new VarcharReaderByValue(varcharTypeInfo, outputColumn); - readerByReference = new VarcharReaderByReference(varcharTypeInfo, outputColumn); - } + readerByValue = new VarcharReaderByValue(typeInfo, outputColumn); + readerByReference = new VarcharReaderByReference(typeInfo, outputColumn); break; case BINARY: - readerByValue = new BinaryReaderByValue(outputColumn); - readerByReference = new BinaryReaderByReference(outputColumn); + readerByValue = new BinaryReaderByValue(typeInfo, outputColumn); + readerByReference = new BinaryReaderByReference(typeInfo, outputColumn); break; case DECIMAL: - readerByValue = new HiveDecimalReader(outputColumn); + readerByValue = new HiveDecimalReader(typeInfo, outputColumn); break; case INTERVAL_YEAR_MONTH: - readerByValue = new IntervalYearMonthReader(outputColumn); + readerByValue = new IntervalYearMonthReader(typeInfo, outputColumn); break; case INTERVAL_DAY_TIME: - readerByValue = new IntervalDayTimeReader(outputColumn); + readerByValue = new IntervalDayTimeReader(typeInfo, outputColumn); break; default: throw new HiveException("Unexpected primitive type category " + primitiveCategory); @@ -689,6 +769,74 @@ public void init(int startColumn) throws HiveException { } } + public void init(boolean[] columnsToIncludeTruncated) throws HiveException { + + if (columnsToIncludeTruncated != null) { + deserializeRead.setColumnsToInclude(columnsToIncludeTruncated); + } + + final int columnCount = (columnsToIncludeTruncated == null ? + typeInfos.length : columnsToIncludeTruncated.length); + + readersByValue = new Reader[columnCount]; + readersByReference = new Reader[columnCount]; + + for (int i = 0; i < columnCount; i++) { + + if (columnsToIncludeTruncated != null && !columnsToIncludeTruncated[i]) { + + // Field not included in query. + + Reader notIncludedColumnReader = new NotIncludedColumnReader(null, i); + readersByValue[i] = notIncludedColumnReader; + readersByReference[i] = notIncludedColumnReader; + + } else { + + addReader(i, i); + + } + } + } + + public void init(TypeInfo[] targetTypeInfos, boolean[] conversionFlags, + boolean[] columnsToIncludeTruncated) throws HiveException { + + if (columnsToIncludeTruncated != null) { + deserializeRead.setColumnsToInclude(columnsToIncludeTruncated); + } + + final int columnCount = (columnsToIncludeTruncated == null ? + typeInfos.length : columnsToIncludeTruncated.length); + + readersByValue = new Reader[columnCount]; + readersByReference = new Reader[columnCount]; + + for (int i = 0; i < columnCount; i++) { + + if (columnsToIncludeTruncated != null && !columnsToIncludeTruncated[i]) { + + // Field not included in query. + + Reader notIncludedColumnReader = new NotIncludedColumnReader(null, i); + readersByValue[i] = notIncludedColumnReader; + readersByReference[i] = notIncludedColumnReader; + + } else { + + if (conversionFlags != null && conversionFlags[i]) { + + addConversionReader(targetTypeInfos[i], i, i); + + } else { + + addReader(i, i); + + } + } + } + } + public void init() throws HiveException { init(0); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java index 033be38..0be1f00 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java @@ -19,15 +19,306 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.exec.MapOperator; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; +import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BinaryComparable; import org.apache.hadoop.io.Writable; -public class VectorMapOperator extends MapOperator { +public class VectorMapOperator extends AbstractMapOperator { private static final long serialVersionUID = 1L; + private transient HashMap fileToPartitionContextMap; + + private transient Operator oneRootOperator; + + private transient StructObjectInspector tableStructObjectInspector; + + private transient TypeInfo[] tableRowTypeInfos; + + private transient VectorMapOperatorReadType currentReadType; + private transient PartitionContext currentPartContext; + + private transient int currentDataColumnCount; + + private transient DeserializeRead currentDeserializeRead; + private transient VectorDeserializeRow currentVectorDeserializeRow; + + private Deserializer currentPartDeserializer; + private StructObjectInspector currentPartRawRowObjectInspector; + private VectorAssignRowSameBatch currentVectorAssign; + + private transient VectorizedRowBatchCtx batchContext; + private transient VectorizedRowBatch batch; + + private transient int dataColumnCount; + private transient int partitionColumnCount; + private transient Object[] partitionValues; + + private transient boolean[] columnsToIncludeTruncated; + + protected abstract class PartitionContext { + + protected final PartitionDesc partDesc; + + String tableName; + String partName; + + private PartitionContext(PartitionDesc partDesc) { + this.partDesc = partDesc; + + TableDesc td = partDesc.getTableDesc(); + + // Use table properties in case of unpartitioned tables, + // and the union of table properties and partition properties, with partition + // taking precedence, in the case of partitioned tables + Properties overlayedProps = + SerDeUtils.createOverlayedProperties(td.getProperties(), partDesc.getProperties()); + + Map partSpec = partDesc.getPartSpec(); + + tableName = String.valueOf(overlayedProps.getProperty("name")); + partName = String.valueOf(partSpec); + + } + + public PartitionDesc getPartDesc() { + return partDesc; + } + + public abstract void init(Configuration hconf) + throws SerDeException, Exception; + } + + protected class VectorizedInputFileFormatPartitionContext extends PartitionContext { + + private VectorizedInputFileFormatPartitionContext(PartitionDesc partDesc) { + super(partDesc); + } + + public void init(Configuration hconf) { + } + } + + protected class VectorDeserializePartitionContext extends PartitionContext { + + // This helper object deserializes known deserialization / input file format combination into + // columns of a row in a vectorized row batch. + private VectorDeserializeRow vectorDeserializeRow; + + private DeserializeRead deserializeRead; + + private VectorDeserializePartitionContext(PartitionDesc partDesc) { + super(partDesc); + } + + public VectorDeserializeRow getVectorDeserializeRow() { + return vectorDeserializeRow; + } + + DeserializeRead getDeserializeRead() { + return deserializeRead; + } + + public void init(Configuration hconf) + throws SerDeException, HiveException { + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); + + // This type information specifies the data types the partition needs to read. + TypeInfo[] dataTypeInfos = vectorPartDesc.getTypeInfos(); + + boolean[] conversionFlags = vectorPartDesc.getConversionFlags(); + + switch (vectorPartDesc.getVectorDeserializeType()) { + case LAZY_SIMPLE: + { + LazySerDeParameters simpleSerdeParams = + new LazySerDeParameters(hconf, partDesc.getTableDesc().getProperties(), + LazySimpleSerDe.class.getName()); + + // The LazySimple deserialization will fill in the type we want for the table. + // Hence, we pass tableRowPrimitiveTypeInfos. + LazySimpleDeserializeRead lazySimpleDeserializeRead = + new LazySimpleDeserializeRead(dataTypeInfos, simpleSerdeParams); + + vectorDeserializeRow = + new VectorDeserializeRow(lazySimpleDeserializeRead); + + deserializeRead = lazySimpleDeserializeRead; + } + break; + + case LAZY_BINARY: + { + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + new LazyBinaryDeserializeRead(dataTypeInfos); + + vectorDeserializeRow = + new VectorDeserializeRow(lazyBinaryDeserializeRead); + + deserializeRead = lazyBinaryDeserializeRead; + } + break; + + default: + throw new RuntimeException( + "Unexpected vector deserialize row type " + vectorPartDesc.getVectorDeserializeType().name()); + } + + // Initialize with data type conversion parameters. + vectorDeserializeRow.init(tableRowTypeInfos, conversionFlags, + columnsToIncludeTruncated); + } + } + + protected class RowDeserializePartitionContext extends PartitionContext { + + private Deserializer partDeserializer; + private StructObjectInspector partRawRowObjectInspector; + private VectorAssignRowSameBatch vectorAssign; + + private RowDeserializePartitionContext(PartitionDesc partDesc) { + super(partDesc); + } + + public Deserializer getPartDeserializer() { + return partDeserializer; + } + + public StructObjectInspector getPartRawRowObjectInspector() { + return partRawRowObjectInspector; + } + + public VectorAssignRowSameBatch getVectorAssign() { + return vectorAssign; + } + + public void init(Configuration hconf) + throws Exception { + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); + + partDeserializer = partDesc.getDeserializer(hconf); + + partRawRowObjectInspector = + (StructObjectInspector) partDeserializer.getObjectInspector(); + + boolean[] conversionFlags= vectorPartDesc.getConversionFlags(); + + TypeInfo[] dataTypeInfos = vectorPartDesc.getTypeInfos(); + + vectorAssign = new VectorAssignRowSameBatch(); + + // Initialize with data type conversion parameters. + vectorAssign.init(dataTypeInfos, tableRowTypeInfos, conversionFlags, + columnsToIncludeTruncated); + + vectorAssign.setOneBatch(batch); + } + } + + public PartitionContext CreateAndInitPartitionContext(PartitionDesc partDesc, + Configuration hconf) + throws SerDeException, Exception { + + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); + if (vectorPartDesc == null) { + LOG.info("debug"); + } + PartitionContext partitionContext; + VectorMapOperatorReadType vectorMapOperatorReadType = + vectorPartDesc.getVectorMapOperatorReadType(); + switch (vectorMapOperatorReadType) { + case VECTORIZED_INPUT_FILE_FORMAT: + partitionContext = new VectorizedInputFileFormatPartitionContext(partDesc); + break; + + case VECTOR_DESERIALIZE: + partitionContext = new VectorDeserializePartitionContext(partDesc); + break; + + case ROW_DESERIALIZE: + partitionContext = new RowDeserializePartitionContext(partDesc); + break; + + default: + throw new RuntimeException("Unexpected vector MapOperator read type " + + vectorMapOperatorReadType.name()); + } + + partitionContext.init(hconf); + + return partitionContext; + } + + private void determineColumnsToInclude(Configuration hconf) { + + columnsToIncludeTruncated = null; + + List columnsToIncludeTruncatedList = ColumnProjectionUtils.getReadColumnIDs(hconf); + if (columnsToIncludeTruncatedList != null && + columnsToIncludeTruncatedList.size() > 0 && columnsToIncludeTruncatedList.size() < dataColumnCount ) { + + // Partitioned columns will not be in the include list. + + boolean[] columnsToInclude = new boolean[dataColumnCount]; + Arrays.fill(columnsToInclude, false); + for (int columnNum : columnsToIncludeTruncatedList) { + columnsToInclude[columnNum] = true; + } + + // Work backwards to find the highest wanted column. + + int highestWantedColumnNum = -1; + for (int i = dataColumnCount - 1; i >= 0; i--) { + if (columnsToInclude[i]) { + highestWantedColumnNum = i; + break; + } + } + if (highestWantedColumnNum == -1) { + throw new RuntimeException("No columns to include?"); + } + int newColumnCount = highestWantedColumnNum + 1; + if (newColumnCount == dataColumnCount) { + columnsToIncludeTruncated = columnsToInclude; + } else { + columnsToIncludeTruncated = Arrays.copyOf(columnsToInclude, newColumnCount); + } + } + } + /** Kryo ctor. */ protected VectorMapOperator() { super(); @@ -37,29 +328,336 @@ public VectorMapOperator(CompilationOpContext ctx) { super(ctx); } + // Create a file to VectorFileContext map. + // Where VectorFileContext describes how to process "rows" (could be VRBs). + // + @Override + public void setChildren(Configuration hconf) throws Exception { + + // Get the one TableScanOperator. + oneRootOperator = conf.getAliasToWork().values().iterator().next(); + + currentReadType = VectorMapOperatorReadType.NONE; + + determineColumnsToInclude(hconf); + + // UNDONE: Put this in the MapWork to make it available to Pass-Thru VectorizedInputFileFormat + // UNDONE: readers. + // UNDONE: + batchContext = conf.getVectorizedRowBatchCtx(); + + batch = batchContext.createVectorizedRowBatch(columnsToIncludeTruncated); + conf.setVectorizedRowBatch(batch); + + dataColumnCount = batchContext.getDataColumnCount(); + partitionColumnCount = batchContext.getPartitionColumnCount(); + partitionValues = new Object[partitionColumnCount]; + + // Create table related objects + TypeInfo tableStructTypeInfo = TypeInfoFactory.getStructTypeInfo( + Arrays.asList(batchContext.getRowColumnNames()), + Arrays.asList(batchContext.getRowColumnTypeInfos())); + tableStructObjectInspector = + (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + tableStructTypeInfo); + + tableRowTypeInfos = batchContext.getRowColumnTypeInfos(); + + // The Vectorizer class enforces that there is only one TableScanOperator, so + // we don't need the more complicated multiple root operator mapping that MapOperator has. + + fileToPartitionContextMap = new HashMap(); + + // Temporary map so we only create one partition context entry. + HashMap partitionContextMap = + new HashMap(); + + for (Map.Entry> entry : conf.getPathToAliases().entrySet()) { + String path = entry.getKey(); + PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path); + ArrayList aliases = entry.getValue(); + + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); + if (vectorPartDesc == null) { + LOG.info("Ignoring path " + path + " with aliases " + aliases + " since it does not have vector partition descriptor? Did not get examined by the Vectorizer class"); + continue; // UNDONE: Need to understand what the new PartitionDesc is about... + } + LOG.info("VectorMapOperator path: " + path + ", read type " + vectorPartDesc.getVectorMapOperatorReadType().name() + + ", vector deserialize type " + vectorPartDesc.getVectorDeserializeType().name() + ", aliases " + aliases); + + PartitionContext partitionContext; + if (!partitionContextMap.containsKey(partDesc)) { + partitionContext = CreateAndInitPartitionContext(partDesc, hconf); + partitionContextMap.put(partDesc, partitionContext); + } else { + partitionContext = partitionContextMap.get(partDesc); + } + + fileToPartitionContextMap.put(path, partitionContext); + } + + // Create list of one. + List> children = + new ArrayList>(); + children.add(oneRootOperator); + + setChildOperators(children); + } + + @Override + public void initializeMapOperator(Configuration hconf) throws HiveException { + super.initializeMapOperator(hconf); + + oneRootOperator.initialize(hconf, new ObjectInspector[] {tableStructObjectInspector}); + } + + public void initializeContexts() throws HiveException { + Path fpath = getExecContext().getCurrentInputPath(); + String nominalPath = getNominalPath(fpath); + setupPartitionContextVars(nominalPath); + } + + // Find context for current input file + @Override + public void cleanUpInputFileChangedOp() throws HiveException { + super.cleanUpInputFileChangedOp(); + Path fpath = getExecContext().getCurrentInputPath(); + String nominalPath = getNominalPath(fpath); + + setupPartitionContextVars(nominalPath); + + // Add alias, table name, and partitions to hadoop conf so that their + // children will inherit these + oneRootOperator.setInputContext(nominalPath, currentPartContext.tableName, + currentPartContext.partName); + } + + private void setupPartitionContextVars(String nominalPath) throws HiveException { + + if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) { + + // Our global batch was passed through by the process method for the previous file and + // now needs to be reset before reuse. We assume cleanUpInputFileChangedOp is called before + // the next split's reader is set up. + + batch.reset(); + } + + currentPartContext = fileToPartitionContextMap.get(nominalPath); + PartitionDesc partDesc = currentPartContext.getPartDesc(); + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); + currentReadType = vectorPartDesc.getVectorMapOperatorReadType(); + + if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) { + + // We will get a pass-thru VectorizedRowBatch as a row from the reader. + + // The reader is responsible for setting the partition columns. + + currentDataColumnCount = 0; + + currentDeserializeRead = null; + currentVectorDeserializeRow = null; + + currentPartDeserializer = null; + currentPartRawRowObjectInspector = null; + currentVectorAssign = null; + + } else { + + // We will get an un-deserialized row from the reader. + + if (batch.size > 0) { + + // Clear out any rows in the batch from previous partition since we are going to change + // the repeating partition column values. + + oneRootOperator.process(batch, 0); + if (oneRootOperator.getDone()) { + setDone(true); + } + batch.reset(); + } + + currentDataColumnCount = vectorPartDesc.getNonPartColumnCount(); + + if (currentDataColumnCount < dataColumnCount) { + + // Default default any additional data columns to NULL once for the file. + + for (int i = currentDataColumnCount; i < dataColumnCount; i++) { + ColumnVector colVector = batch.cols[i]; + colVector.isNull[0] = true; + colVector.noNulls = false; + colVector.isRepeating = true; + } + } + + if (batchContext.getPartitionColumnCount() > 0) { + + // The partition columns are set once for the partition and are marked repeating. + + VectorizedRowBatchCtx.getPartitionValues(batchContext, partDesc, partitionValues); + batchContext.addPartitionColsToBatch(batch, partitionValues); + } + + switch (currentReadType) { + case VECTOR_DESERIALIZE: + { + VectorDeserializePartitionContext vectorDeserPartContext = + (VectorDeserializePartitionContext) currentPartContext; + + currentDeserializeRead = vectorDeserPartContext.getDeserializeRead(); + currentVectorDeserializeRow = vectorDeserPartContext.getVectorDeserializeRow(); + + currentPartDeserializer = null; + currentPartRawRowObjectInspector = null; + currentVectorAssign = null; + + } + break; + + case ROW_DESERIALIZE: + { + RowDeserializePartitionContext rowDeserPartContext = + (RowDeserializePartitionContext) currentPartContext; + + currentDeserializeRead = null; + currentVectorDeserializeRow = null; + + currentPartDeserializer = rowDeserPartContext.getPartDeserializer(); + currentPartRawRowObjectInspector = rowDeserPartContext.getPartRawRowObjectInspector(); + currentVectorAssign = rowDeserPartContext.getVectorAssign(); + } + break; + + default: + throw new RuntimeException("Unexpected vector MapOperator read type " + + currentReadType.name()); + } + } + } + + @Override + public Deserializer getCurrentDeserializer() { + // Not applicable. + return null; + } + @Override public void process(Writable value) throws HiveException { + // A mapper can span multiple files/partitions. - // The serializers need to be reset if the input file changed + // The PartitionContext need to be changed if the input file changed ExecMapperContext context = getExecContext(); if (context != null && context.inputFileChanged()) { // The child operators cleanup if input file has changed cleanUpInputFileChanged(); } - // The row has been converted to comply with table schema, irrespective of partition schema. - // So, use tblOI (and not partOI) for forwarding - try { - int childrenDone = 0; - for (MapOpCtx current : currentCtxs) { - if (!current.forward(value)) { - childrenDone++; + if (!oneRootOperator.getDone()) { + try { + if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) { + + // We pass-true VectorizedRowBatch as a row. + + oneRootOperator.process(value, 0); + if (oneRootOperator.getDone()) { + setDone(true); + return; + } + + } else { + + // We have an un-deserialized row from the reader. + + if (batch.size == batch.DEFAULT_SIZE) { + + // Feed full batch to operator tree. + oneRootOperator.process(batch, 0); + if (oneRootOperator.getDone()) { + setDone(true); + return; + } + + /** + * Only reset the current data columns. Not any data columns defaulted to NULL + * because they are not present in the partition, and not partition columns. + */ + for (int c = 0; c < currentDataColumnCount; c++) { + batch.cols[c].reset(); + batch.cols[c].init(); + } + batch.selectedInUse = false; + batch.size = 0; + batch.endOfFile = false; + } + + switch (currentReadType) { + case VECTOR_DESERIALIZE: + { + BinaryComparable binComp = (BinaryComparable) value; + currentDeserializeRead.set(binComp.getBytes(), 0, binComp.getLength()); + + currentVectorDeserializeRow.deserializeByValue(batch, batch.size); + } + break; + + case ROW_DESERIALIZE: + { + Object deserialized = currentPartDeserializer.deserialize(value); + currentVectorAssign.assignRow(batch.size, deserialized, + currentPartRawRowObjectInspector); + } + break; + + default: + throw new RuntimeException("Unexpected vector MapOperator read type " + + currentReadType.name()); + } + batch.size++; } + } catch (Exception e) { + throw new HiveException("Hive Runtime Error while processing row ", e); } + } + } + + @Override + public void process(Object row, int tag) throws HiveException { + throw new HiveException("Hive 2 Internal error: should not be called!"); + } - rowsForwarded(childrenDone, ((VectorizedRowBatch)value).size); - } catch (Exception e) { - throw new HiveException("Hive Runtime Error while processing row ", e); + @Override + public void closeOp(boolean abort) throws HiveException { + if (!abort && oneRootOperator != null && !oneRootOperator.getDone() && + currentReadType != VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) { + if (batch.size > 0) { + oneRootOperator.process(batch, 0); + } } + super.closeOp(abort); + } + + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "MAP"; + } + + @Override + public OperatorType getType() { + return null; + } + + @Override + public void initEmptyInputChildren(List> children, + Configuration hconf) throws SerDeException, Exception { + // UNDONE ???? + // TODO Auto-generated method stub + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 14a6a72..082bd29 100755 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -206,7 +206,7 @@ public void configure(JobConf job) { return inputFormat; // LLAP not enabled, no-op. } boolean isSupported = inputFormat instanceof LlapWrappableInputFormatInterface, - isVector = Utilities.isVectorMode(conf); + isVector = Utilities.getUseVectorizedInputFileFormat(conf); if (!isSupported || !isVector) { LOG.info("Not using llap for " + inputFormat + ": " + isSupported + ", " + isVector); return inputFormat; @@ -229,7 +229,7 @@ public static boolean isLlapEnabled(Configuration conf) { } public static boolean canWrapAnyForLlap(Configuration conf, MapWork mapWork) { - return Utilities.isVectorMode(conf, mapWork); + return Utilities.getUseVectorizedInputFileFormat(conf, mapWork); } public static boolean canWrapForLlap(Class inputFormatClass) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index f36f707..066167c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -486,7 +486,7 @@ public boolean validateInput(FileSystem fs, HiveConf conf, List files ) throws IOException { - if (Utilities.isVectorMode(conf)) { + if (Utilities.getUseVectorizedInputFileFormat(conf)) { return new VectorizedOrcInputFormat().validateInput(fs, conf, files); } @@ -1458,7 +1458,7 @@ private static void scheduleSplits(ETLSplitStrategy splitStrategy, Context conte public org.apache.hadoop.mapred.RecordReader getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { - boolean vectorMode = Utilities.isVectorMode(conf); + boolean vectorMode = Utilities.getUseVectorizedInputFileFormat(conf); boolean isAcidRead = isAcidRead(conf, inputSplit); if (!isAcidRead) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java index a4e35cb..5b65e5c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -59,7 +59,7 @@ protected MapredParquetInputFormat(final ParquetInputFormat input final org.apache.hadoop.mapred.Reporter reporter ) throws IOException { try { - if (Utilities.isVectorMode(job)) { + if (Utilities.getUseVectorizedInputFileFormat(job)) { if (LOG.isDebugEnabled()) { LOG.debug("Using vectorized record reader"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index ee080aa..0d983d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -113,6 +113,7 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; @@ -157,6 +158,8 @@ import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; @@ -166,6 +169,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.mapred.SequenceFileInputFormat; +import org.apache.hadoop.mapred.TextInputFormat; import com.google.common.base.Joiner; @@ -335,6 +340,7 @@ public Vectorizer() { List columnNames; List typeInfos; int partitionColumnCount; + boolean useVectorizedInputFileFormat; String[] scratchTypeNameArray; @@ -356,7 +362,9 @@ public void setPartitionColumnCount(int partitionColumnCount) { public void setScratchTypeNameArray(String[] scratchTypeNameArray) { this.scratchTypeNameArray = scratchTypeNameArray; } - + public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { + this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + } public void setNonVectorizedOps(Set> nonVectorizedOps) { this.nonVectorizedOps = nonVectorizedOps; } @@ -377,6 +385,8 @@ public void transferToBaseWork(BaseWork baseWork) { partitionColumnCount, scratchTypeNameArray); baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); + + baseWork.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); } } @@ -493,24 +503,88 @@ private String getColumns(List columnNames, int start, int length, return Joiner.on(separator).join(columnNames.subList(start, start + length)); } - private String getTypes(List typeInfos, int start, int length) { - return TypeInfoUtils.getTypesString(typeInfos.subList(start, start + length)); - } + private boolean verifyAndSetVectorPartDesc(PartitionDesc pd, boolean isAcidTable, + boolean useVectorizedInputFileFormat, boolean useVectorDeserialize, + boolean useRowDeserialize) { - private boolean verifyAndSetVectorPartDesc(PartitionDesc pd) { + String inputFileFormatClassName = pd.getInputFileFormatClassName(); // Look for Pass-Thru case where InputFileFormat has VectorizedInputFormatInterface // and reads VectorizedRowBatch as a "row". - if (Utilities.isInputFileFormatVectorized(pd)) { + if (useVectorizedInputFileFormat) { + if (Utilities.isInputFileFormatVectorized(pd)) { - pd.setVectorPartitionDesc(VectorPartitionDesc.createVectorizedInputFileFormat()); + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorizedInputFileFormat(inputFileFormatClassName)); + + return true; + } + } + + /** + * When the table is ACID, then we can only vectorized with the input file format... + */ + if (isAcidTable) { + + LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized"); + + return false; + } + + String deserializerClassName = pd.getDeserializerClassName(); + + // Look for InputFileFormat / Serde combinations we can deserialize more efficiently + // using VectorDeserializeRow and a deserialize class with the DeserializeRead interface. + // + // Do the "vectorized" row-by-row deserialization into a VectorizedRowBatch in the + // VectorMapOperator. + + if (useVectorDeserialize) { + + // Currently, we support LazySimple deserialization: + // + // org.apache.hadoop.mapred.TextInputFormat + // org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + // + // AND + // + // org.apache.hadoop.mapred.SequenceFileInputFormat + // org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + if (inputFileFormatClassName.equals(TextInputFormat.class.getName()) && + deserializerClassName.equals(LazySimpleSerDe.class.getName())) { + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorDeserialize( + inputFileFormatClassName, VectorDeserializeType.LAZY_SIMPLE)); + + return true; + } else if (inputFileFormatClassName.equals(SequenceFileInputFormat.class.getName()) && + deserializerClassName.equals(LazyBinarySerDe.class.getName())) { + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createVectorDeserialize( + inputFileFormatClassName, VectorDeserializeType.LAZY_BINARY)); + + return true; + } + } + + // Otherwise, if enabled, deserialize rows using regular Serde and add the object + // inspect-able Object[] row to a VectorizedRowBatch in the VectorMapOperator. + + if (useRowDeserialize) { + + pd.setVectorPartitionDesc( + VectorPartitionDesc.createRowDeserialize( + inputFileFormatClassName, deserializerClassName)); return true; + } - LOG.info("Input format: " + pd.getInputFileFormatClassName() - + ", doesn't provide vectorized input"); + LOG.info("Input format: " + inputFileFormatClassName + " cannot be vectorized"); return false; } @@ -518,6 +592,18 @@ private boolean verifyAndSetVectorPartDesc(PartitionDesc pd) { private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String alias, TableScanOperator tableScanOperator, VectorTaskColumnInfo vectorTaskColumnInfo) { + boolean isAcidTable = tableScanOperator.getConf().isAcidTable(); + + boolean useVectorizedInputFileFormat = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT); + boolean useVectorDeserialize = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE); + boolean useRowDeserialize = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_USE_ROW_DESERIALIZE); + // These names/types are the data columns plus partition columns. final List allColumnNameList = new ArrayList(); final List allTypeInfoList = new ArrayList(); @@ -557,12 +643,13 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al // We seen this already. continue; } - if (!verifyAndSetVectorPartDesc(partDesc)) { + if (!verifyAndSetVectorPartDesc(partDesc, isAcidTable, useVectorizedInputFileFormat, + useVectorDeserialize, useRowDeserialize)) { return false; } VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); - LOG.info("Vectorizer path: " + path + ", read type " + - vectorPartDesc.getVectorMapOperatorReadType().name() + ", aliases " + aliases); + LOG.info("Vectorizer path: " + path + ", " + vectorPartDesc.toString() + + ", aliases " + aliases); Properties partProps = partDesc.getProperties(); @@ -671,6 +758,7 @@ private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String al vectorTaskColumnInfo.setColumnNames(allColumnNameList); vectorTaskColumnInfo.setTypeInfos(allTypeInfoList); vectorTaskColumnInfo.setPartitionColumnCount(partitionColumnCount); + vectorTaskColumnInfo.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat); return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index 5914b1c..601078b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -68,6 +68,8 @@ public BaseWork(String name) { protected VectorizedRowBatchCtx vectorizedRowBatchCtx; + protected boolean useVectorizedInputFileFormat; + protected boolean llapMode = false; protected boolean uberMode = false; @@ -166,6 +168,14 @@ public void setVectorizedRowBatchCtx(VectorizedRowBatchCtx vectorizedRowBatchCtx this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } + public void setUseVectorizedInputFileFormat(boolean useVectorizedInputFileFormat) { + this.useVectorizedInputFileFormat = useVectorizedInputFileFormat; + } + + public boolean getUseVectorizedInputFileFormat() { + return useVectorizedInputFileFormat; + } + // ----------------------------------------------------------------------------------------------- /** diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 3ef50fc..f038e1b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; @@ -131,6 +132,8 @@ private boolean doSplitsGrouping = true; + private VectorizedRowBatch vectorizedRowBatch; + // bitsets can't be correctly serialized by Kryo's default serializer // BitSet::wordsInUse is transient, so force dumping into a lower form private byte[] includedBuckets; @@ -643,4 +646,12 @@ public void setIncludedBuckets(BitSet includedBuckets) { // see comment next to the field this.includedBuckets = includedBuckets.toByteArray(); } + + public void setVectorizedRowBatch(VectorizedRowBatch vectorizedRowBatch) { + this.vectorizedRowBatch = vectorizedRowBatch; + } + + public VectorizedRowBatch getVectorizedRowBatch() { + return vectorizedRowBatch; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java index 8fe298d..0e3d82c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java @@ -117,7 +117,6 @@ public void validateConversion(List fromTypeInfoList, // The method validateOne will turn this off when invalid conversion is found. validConversion = true; - boolean atLeastOneConversion = false; for (int i = 0; i < columnCount; i++) { TypeInfo fromTypeInfo = fromTypeInfoList.get(i); TypeInfo toTypeInfo = toTypeInfoList.get(i); @@ -127,12 +126,6 @@ public void validateConversion(List fromTypeInfoList, return; } } - - if (atLeastOneConversion) { - // Leave resultConversionFlags set. - } else { - resultConversionFlags = null; - } } public void validateConversion(TypeInfo[] fromTypeInfos, TypeInfo[] toTypeInfos) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java index 45151f2..7004577 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java @@ -39,44 +39,137 @@ // No data type conversion check? Assume ALTER TABLE prevented conversions that // VectorizedInputFileFormat cannot handle... // + // VECTOR_DESERIALIZE: + // LAZY_SIMPLE: + // Capable of converting on its own. + // LAZY_BINARY + // Partition schema assumed to match file contents. + // Conversion necessary from partition field values to vector columns. + // ROW_DESERIALIZE + // Partition schema assumed to match file contents. + // Conversion necessary from partition field values to vector columns. + // public static enum VectorMapOperatorReadType { NONE, - VECTORIZED_INPUT_FILE_FORMAT + VECTORIZED_INPUT_FILE_FORMAT, + VECTOR_DESERIALIZE, + ROW_DESERIALIZE } + public static enum VectorDeserializeType { + NONE, + LAZY_SIMPLE, + LAZY_BINARY + } private final VectorMapOperatorReadType vectorMapOperatorReadType; + private final VectorDeserializeType vectorDeserializeType; private final boolean needsDataTypeConversionCheck; + private final String rowDeserializerClassName; + private final String inputFileFormatClassName; + private boolean[] conversionFlags; private TypeInfo[] typeInfos; - private VectorPartitionDesc(VectorMapOperatorReadType vectorMapOperatorReadType, - boolean needsDataTypeConversionCheck) { + private VectorPartitionDesc(String inputFileFormatClassName, + VectorMapOperatorReadType vectorMapOperatorReadType, boolean needsDataTypeConversionCheck) { this.vectorMapOperatorReadType = vectorMapOperatorReadType; + this.vectorDeserializeType = VectorDeserializeType.NONE; this.needsDataTypeConversionCheck = needsDataTypeConversionCheck; + this.inputFileFormatClassName = inputFileFormatClassName; + rowDeserializerClassName = null; + conversionFlags = null; + typeInfos = null; + } + /** + * Create a VECTOR_DESERIALIZE flavor object. + * @param vectorMapOperatorReadType + * @param vectorDeserializeType + * @param needsDataTypeConversionCheck + */ + private VectorPartitionDesc(String inputFileFormatClassName, + VectorDeserializeType vectorDeserializeType, boolean needsDataTypeConversionCheck) { + this.vectorMapOperatorReadType = VectorMapOperatorReadType.VECTOR_DESERIALIZE; + this.vectorDeserializeType = vectorDeserializeType; + this.needsDataTypeConversionCheck = needsDataTypeConversionCheck; + this.inputFileFormatClassName = inputFileFormatClassName; + rowDeserializerClassName = null; + conversionFlags = null; + typeInfos = null; + } + + /** + * Create a ROW_DESERIALIZE flavor object. + * @param rowDeserializerClassName + * @param inputFileFormatClassName + */ + private VectorPartitionDesc(String inputFileFormatClassName, String rowDeserializerClassName) { + this.vectorMapOperatorReadType = VectorMapOperatorReadType.ROW_DESERIALIZE; + this.vectorDeserializeType = VectorDeserializeType.NONE; + this.needsDataTypeConversionCheck = true; + this.inputFileFormatClassName = inputFileFormatClassName; + this.rowDeserializerClassName = rowDeserializerClassName; conversionFlags = null; typeInfos = null; } - public static VectorPartitionDesc createVectorizedInputFileFormat() { - return new VectorPartitionDesc(VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT, true); + public static VectorPartitionDesc createVectorizedInputFileFormat(String inputFileFormatClassName) { + return new VectorPartitionDesc(inputFileFormatClassName, + VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT, true); } + public static VectorPartitionDesc createVectorDeserialize(String inputFileFormatClassName, + VectorDeserializeType vectorDeserializeType) { + boolean needsDataTypeConversionCheck; + switch (vectorDeserializeType) { + case LAZY_SIMPLE: + needsDataTypeConversionCheck = false; + break; + case LAZY_BINARY: + needsDataTypeConversionCheck = true; + break; + default: + throw new RuntimeException("Unexpected vector deserialize type " + + vectorDeserializeType.name()); + } + return new VectorPartitionDesc(inputFileFormatClassName, vectorDeserializeType, + needsDataTypeConversionCheck); + } + + public static VectorPartitionDesc createRowDeserialize(String inputFileFormatClassName, + String rowDeserializerClassName) { + return new VectorPartitionDesc(rowDeserializerClassName, inputFileFormatClassName); + } @Override public VectorPartitionDesc clone() { - VectorPartitionDesc result = - new VectorPartitionDesc(vectorMapOperatorReadType, - needsDataTypeConversionCheck); + VectorPartitionDesc result; + switch (vectorMapOperatorReadType) { + case VECTORIZED_INPUT_FILE_FORMAT: + result = new VectorPartitionDesc(inputFileFormatClassName, vectorMapOperatorReadType, + needsDataTypeConversionCheck); + break; + case VECTOR_DESERIALIZE: + result = new VectorPartitionDesc(inputFileFormatClassName, vectorDeserializeType, + needsDataTypeConversionCheck); + break; + case ROW_DESERIALIZE: + result = new VectorPartitionDesc(inputFileFormatClassName, rowDeserializerClassName); + break; + default: + throw new RuntimeException("Unexpected vector map operator read type " + vectorMapOperatorReadType.name()); + } + result.conversionFlags = (conversionFlags == null ? null : Arrays.copyOf(conversionFlags, conversionFlags.length)); result.typeInfos = Arrays.copyOf(typeInfos, typeInfos.length); + return result; } @@ -84,6 +177,17 @@ public VectorMapOperatorReadType getVectorMapOperatorReadType() { return vectorMapOperatorReadType; } + public String getInputFileFormatClassName() { + return inputFileFormatClassName; + } + + public VectorDeserializeType getVectorDeserializeType() { + return vectorDeserializeType; + } + + public String getRowDeserializerClassName() { + return rowDeserializerClassName; + } public boolean getNeedsDataTypeConversionCheck() { return needsDataTypeConversionCheck; } @@ -107,4 +211,28 @@ public void setTypeInfos(List typeInfoList) { public int getNonPartColumnCount() { return typeInfos.length; } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("vector map operator read type "); + sb.append(vectorMapOperatorReadType.name()); + sb.append(", input file format class name "); + sb.append(inputFileFormatClassName); + switch (vectorMapOperatorReadType) { + case VECTORIZED_INPUT_FILE_FORMAT: + break; + case VECTOR_DESERIALIZE: + sb.append(", deserialize type "); + sb.append(vectorDeserializeType.name()); + break; + case ROW_DESERIALIZE: + sb.append(", deserializer class name "); + sb.append(rowDeserializerClassName); + break; + default: + throw new RuntimeException("Unexpected vector map operator read type " + vectorMapOperatorReadType.name()); + } + return sb.toString(); + } } diff --git ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q index fc935d5..b0e57fb 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q @@ -4,6 +4,9 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; SET hive.exec.schema.evolution=false; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q index e49a0f3..ca6822c 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q @@ -3,6 +3,9 @@ set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; SET hive.exec.schema.evolution=false; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q index 30b19bb..da726c5 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q @@ -2,6 +2,9 @@ set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=more; set hive.exec.dynamic.partition.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q index 6df2095..393967f 100644 --- ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q @@ -1,8 +1,9 @@ set hive.cli.print.header=true; -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; - +SET hive.exec.schema.evolution=true; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=true; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=false; set hive.fetch.task.conversion=none; set hive.exec.dynamic.partition.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q deleted file mode 100644 index 44f7264..0000000 --- ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q +++ /dev/null @@ -1,56 +0,0 @@ -set hive.cli.print.header=true; -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=false; -set hive.fetch.task.conversion=none; -set hive.exec.dynamic.partition.mode=nonstrict; - - --- SORT_QUERY_RESULTS --- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table --- --- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT ---- -CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; - -insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); - --- Table-Non-Cascade ADD COLUMNS ... -alter table table1 add columns(c int, d string); - -insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); - -insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); - --- SELECT permutation columns to make sure NULL defaulting works right -select a,b from table1; -select a,b,c from table1; -select a,b,c,d from table1; -select a,c,d from table1; -select a,d from table1; -select c from table1; -select d from table1; - --- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT --- smallint = (2-byte signed integer, from -32,768 to 32,767) --- -CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; - -insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - --- Table-Non-Cascade CHANGE COLUMNS ... -alter table table2 change column a a int; - -insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -insert into table table2 values(5000, 'new'),(90000, 'new'); - -select a,b from table2; - - -DROP TABLE table1; -DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q deleted file mode 100644 index 44f7264..0000000 --- ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q +++ /dev/null @@ -1,56 +0,0 @@ -set hive.cli.print.header=true; -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=false; -set hive.fetch.task.conversion=none; -set hive.exec.dynamic.partition.mode=nonstrict; - - --- SORT_QUERY_RESULTS --- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table --- --- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT ---- -CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; - -insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); - --- Table-Non-Cascade ADD COLUMNS ... -alter table table1 add columns(c int, d string); - -insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); - -insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); - --- SELECT permutation columns to make sure NULL defaulting works right -select a,b from table1; -select a,b,c from table1; -select a,b,c,d from table1; -select a,c,d from table1; -select a,d from table1; -select c from table1; -select d from table1; - --- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT --- smallint = (2-byte signed integer, from -32,768 to 32,767) --- -CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; - -insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - --- Table-Non-Cascade CHANGE COLUMNS ... -alter table table2 change column a a int; - -insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -insert into table table2 values(5000, 'new'),(90000, 'new'); - -select a,b from table2; - - -DROP TABLE table1; -DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q index 4d78642..e54bdec 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q @@ -8,7 +8,7 @@ set hive.exec.dynamic.partition.mode=nonstrict; -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q index 0834351..feb7d67 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q @@ -1,67 +1,54 @@ set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.enabled=false; set hive.fetch.task.conversion=more; +set hive.exec.dynamic.partition.mode=nonstrict; + -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); -select a,b from table1; - --- ADD COLUMNS +-- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string); insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); -select a,b,c,d from table1; - --- ADD COLUMNS -alter table table1 add columns(e string); - -insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2'); - -select a,b,c,d,e from table1; +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); +-- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE; - -insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - -select a,b from table3; - --- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int; - -insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -select a,b from table3; - --- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string); +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; -insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6'); +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); -select a,b from table3; +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); --- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int; +insert into table table2 values(5000, 'new'),(90000, 'new'); -select a,b from table3; +select a,b from table2; DROP TABLE table1; DROP TABLE table2; -DROP TABLE table3; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_part.q index 173e417..d314906 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_part.q +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_part.q @@ -8,7 +8,7 @@ set hive.exec.dynamic.partition.mode=nonstrict; -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_table.q index 83cab14..363b43c 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_table.q +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_table.q @@ -1,67 +1,53 @@ set hive.cli.print.header=true; -SET hive.exec.schema.evolution=true; -SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.enabled=false; set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); -select a,b from table1; - --- ADD COLUMNS +-- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string); insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); -select a,b,c,d from table1; - --- ADD COLUMNS -alter table table1 add columns(e string); - -insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2'); - -select a,b,c,d,e from table1; +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); +-- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE; - -insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); - -select a,b from table3; - --- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int; - -insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); - -select a,b from table3; - --- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string); +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; -insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6'); +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); -select a,b from table3; +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); --- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int; +insert into table table2 values(5000, 'new'),(90000, 'new'); -select a,b from table3; +select a,b from table2; DROP TABLE table1; -DROP TABLE table2; -DROP TABLE table3; \ No newline at end of file +DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_part.q new file mode 100644 index 0000000..5e75c47 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_part.q @@ -0,0 +1,106 @@ +set hive.mapred.mode=nonstrict; +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=false; +SET hive.vectorized.use.vector.serde.deserialize=true; +SET hive.vectorized.use.row.serde.deserialize=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; +-- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string); + +insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1; + +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int; + +insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned2; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string); + +insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int; + +insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned4; + + +DROP TABLE partitioned1; +DROP TABLE partitioned2; +DROP TABLE partitioned3; +DROP TABLE partitioned4; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_table.q new file mode 100644 index 0000000..d09467a --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_mapwork_table.q @@ -0,0 +1,62 @@ +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=false; +SET hive.vectorized.use.vector.serde.deserialize=true; +SET hive.vectorized.use.row.serde.deserialize=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; +-- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1; + +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; + +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; + +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table2 values(5000, 'new'),(90000, 'new'); + +select a,b from table2; + + +DROP TABLE table1; +DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_part.q new file mode 100644 index 0000000..5cb15fb --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_part.q @@ -0,0 +1,106 @@ +set hive.mapred.mode=nonstrict; +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=false; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=true; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + +-- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string); + +insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1; + +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int; + +insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned2; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string); + +insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int; + +insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned4; + + +DROP TABLE partitioned1; +DROP TABLE partitioned2; +DROP TABLE partitioned3; +DROP TABLE partitioned4; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_table.q new file mode 100644 index 0000000..1e6c849 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_mapwork_table.q @@ -0,0 +1,63 @@ +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.use.vectorized.input.format=false; +SET hive.vectorized.use.vector.serde.deserialize=false; +SET hive.vectorized.use.row.serde.deserialize=true; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1; + +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; + +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; + +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table2 values(5000, 'new'),(90000, 'new'); + +select a,b from table2; + + +DROP TABLE table1; +DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/tez_schema_evolution.q ql/src/test/queries/clientpositive/tez_schema_evolution.q index d855cd2..1d8f487 100644 --- ql/src/test/queries/clientpositive/tez_schema_evolution.q +++ ql/src/test/queries/clientpositive/tez_schema_evolution.q @@ -1,3 +1,4 @@ +SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_part.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_part.q.out index 44ce24e..9c2e5ed 100644 --- ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_part.q.out +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_part.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT @@ -11,7 +11,7 @@ PREHOOK: Output: database:default PREHOOK: Output: default@partitioned1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_table.q.out index 4003c20..e60066b 100644 --- ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_table.q.out +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_table.q.out @@ -1,9 +1,9 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -11,10 +11,10 @@ PREHOOK: Output: database:default PREHOOK: Output: default@table1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -31,25 +31,12 @@ POSTHOOK: Output: default@table1 POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table1 -PREHOOK: type: QUERY -PREHOOK: Input: default@table1 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table1 -#### A masked pattern was here #### -a b -1 original -2 original -3 original -4 original -PREHOOK: query: -- ADD COLUMNS +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@table1 PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) POSTHOOK: type: ALTERTABLE_ADDCOLS POSTHOOK: Input: default@table1 @@ -67,6 +54,59 @@ POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2 POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] _col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 PREHOOK: query: select a,b,c,d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 @@ -84,185 +124,150 @@ a b c d 3 original NULL NULL 4 new 40 forty 4 original NULL NULL -PREHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY PREHOOK: Input: default@table1 -PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 -POSTHOOK: Output: default@table1 -PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 PREHOOK: type: QUERY -PREHOOK: Input: default@values__tmp__table__3 -PREHOOK: Output: default@table1 -POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 POSTHOOK: type: QUERY -POSTHOOK: Input: default@values__tmp__table__3 -POSTHOOK: Output: default@table1 -POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] -POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] -_col0 _col1 _col2 _col3 _col4 -PREHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 #### A masked pattern was here #### -POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: query: select d from table1 POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 #### A masked pattern was here #### -a b c d e -1 new 10 ten NULL -1 original NULL NULL NULL -2 new 20 twenty NULL -2 original NULL NULL NULL -3 new 30 thirty NULL -3 original NULL NULL NULL -4 new 40 forty NULL -4 original NULL NULL NULL -5 new 100 hundred another1 -6 new 200 two hundred another2 +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred PREHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@table3 +PREHOOK: Output: default@table2 POSTHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__4 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__4 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -3 original -4 original -6737 original -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__5 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__5 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -6737 original -72909 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__6 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__6 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -_col0 _col1 _col2 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -5000 new -6737 original -72909 new -90000 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: select a,b from table3 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 PREHOOK: type: QUERY -PREHOOK: Input: default@table3 +PREHOOK: Input: default@table2 #### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 +POSTHOOK: query: select a,b from table2 POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 +POSTHOOK: Input: default@table2 #### A masked pattern was here #### a b 1000 original @@ -285,13 +290,9 @@ POSTHOOK: Input: default@table1 POSTHOOK: Output: default@table1 PREHOOK: query: DROP TABLE table2 PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 POSTHOOK: query: DROP TABLE table2 POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE table3 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: DROP TABLE table3 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_part.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_part.q.out index 44f5822..9c2e5ed 100644 --- ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_part.q.out +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_part.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT @@ -11,7 +11,7 @@ PREHOOK: Output: database:default PREHOOK: Output: default@partitioned1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_table.q.out index 4003c20..0a2eabb 100644 --- ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_table.q.out +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_table.q.out @@ -1,9 +1,9 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -11,10 +11,10 @@ PREHOOK: Output: database:default PREHOOK: Output: default@table1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -31,25 +31,12 @@ POSTHOOK: Output: default@table1 POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table1 -PREHOOK: type: QUERY -PREHOOK: Input: default@table1 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table1 -#### A masked pattern was here #### -a b -1 original -2 original -3 original -4 original -PREHOOK: query: -- ADD COLUMNS +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@table1 PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) POSTHOOK: type: ALTERTABLE_ADDCOLS POSTHOOK: Input: default@table1 @@ -67,6 +54,59 @@ POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2 POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] _col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 PREHOOK: query: select a,b,c,d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 @@ -84,185 +124,150 @@ a b c d 3 original NULL NULL 4 new 40 forty 4 original NULL NULL -PREHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY PREHOOK: Input: default@table1 -PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 -POSTHOOK: Output: default@table1 -PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 PREHOOK: type: QUERY -PREHOOK: Input: default@values__tmp__table__3 -PREHOOK: Output: default@table1 -POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 POSTHOOK: type: QUERY -POSTHOOK: Input: default@values__tmp__table__3 -POSTHOOK: Output: default@table1 -POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] -POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] -_col0 _col1 _col2 _col3 _col4 -PREHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 #### A masked pattern was here #### -POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: query: select d from table1 POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 #### A masked pattern was here #### -a b c d e -1 new 10 ten NULL -1 original NULL NULL NULL -2 new 20 twenty NULL -2 original NULL NULL NULL -3 new 30 thirty NULL -3 original NULL NULL NULL -4 new 40 forty NULL -4 original NULL NULL NULL -5 new 100 hundred another1 -6 new 200 two hundred another2 +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred PREHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@table3 +PREHOOK: Output: default@table2 POSTHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__4 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__4 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -3 original -4 original -6737 original -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__5 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__5 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -6737 original -72909 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__6 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__6 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -_col0 _col1 _col2 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -5000 new -6737 original -72909 new -90000 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: select a,b from table3 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 PREHOOK: type: QUERY -PREHOOK: Input: default@table3 +PREHOOK: Input: default@table2 #### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 +POSTHOOK: query: select a,b from table2 POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 +POSTHOOK: Input: default@table2 #### A masked pattern was here #### a b 1000 original @@ -285,13 +290,9 @@ POSTHOOK: Input: default@table1 POSTHOOK: Output: default@table1 PREHOOK: query: DROP TABLE table2 PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 POSTHOOK: query: DROP TABLE table2 POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE table3 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: DROP TABLE table3 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_part.q.out ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_part.q.out new file mode 100644 index 0000000..b6305ef --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_part.q.out @@ -0,0 +1,687 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: partitioned1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_table.q.out new file mode 100644 index 0000000..9ef0915 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_vec_mapwork_table.q.out @@ -0,0 +1,343 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: table1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_part.q.out ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_part.q.out new file mode 100644 index 0000000..79332d7 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_part.q.out @@ -0,0 +1,685 @@ +PREHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: partitioned1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: part (type: int), a (type: int), b (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_table.q.out new file mode 100644 index 0000000..756bc48 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_vecrow_mapwork_table.q.out @@ -0,0 +1,341 @@ +PREHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: table1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_part.q.out index 44ce24e..9c2e5ed 100644 --- ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_part.q.out +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_part.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT @@ -11,7 +11,7 @@ PREHOOK: Output: database:default PREHOOK: Output: default@partitioned1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_table.q.out index 4003c20..e60066b 100644 --- ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_table.q.out +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_table.q.out @@ -1,9 +1,9 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -11,10 +11,10 @@ PREHOOK: Output: database:default PREHOOK: Output: default@table1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -31,25 +31,12 @@ POSTHOOK: Output: default@table1 POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table1 -PREHOOK: type: QUERY -PREHOOK: Input: default@table1 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table1 -#### A masked pattern was here #### -a b -1 original -2 original -3 original -4 original -PREHOOK: query: -- ADD COLUMNS +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@table1 PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) POSTHOOK: type: ALTERTABLE_ADDCOLS POSTHOOK: Input: default@table1 @@ -67,6 +54,59 @@ POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2 POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] _col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 PREHOOK: query: select a,b,c,d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 @@ -84,185 +124,150 @@ a b c d 3 original NULL NULL 4 new 40 forty 4 original NULL NULL -PREHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY PREHOOK: Input: default@table1 -PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 -POSTHOOK: Output: default@table1 -PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 PREHOOK: type: QUERY -PREHOOK: Input: default@values__tmp__table__3 -PREHOOK: Output: default@table1 -POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 POSTHOOK: type: QUERY -POSTHOOK: Input: default@values__tmp__table__3 -POSTHOOK: Output: default@table1 -POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] -POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] -_col0 _col1 _col2 _col3 _col4 -PREHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 #### A masked pattern was here #### -POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: query: select d from table1 POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 #### A masked pattern was here #### -a b c d e -1 new 10 ten NULL -1 original NULL NULL NULL -2 new 20 twenty NULL -2 original NULL NULL NULL -3 new 30 thirty NULL -3 original NULL NULL NULL -4 new 40 forty NULL -4 original NULL NULL NULL -5 new 100 hundred another1 -6 new 200 two hundred another2 +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred PREHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@table3 +PREHOOK: Output: default@table2 POSTHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__4 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__4 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -3 original -4 original -6737 original -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__5 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__5 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -6737 original -72909 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__6 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__6 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -_col0 _col1 _col2 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -5000 new -6737 original -72909 new -90000 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: select a,b from table3 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 PREHOOK: type: QUERY -PREHOOK: Input: default@table3 +PREHOOK: Input: default@table2 #### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 +POSTHOOK: query: select a,b from table2 POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 +POSTHOOK: Input: default@table2 #### A masked pattern was here #### a b 1000 original @@ -285,13 +290,9 @@ POSTHOOK: Input: default@table1 POSTHOOK: Output: default@table1 PREHOOK: query: DROP TABLE table2 PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 POSTHOOK: query: DROP TABLE table2 POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE table3 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: DROP TABLE table3 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_part.q.out index 44f5822..9c2e5ed 100644 --- ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_part.q.out +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_part.q.out @@ -1,6 +1,6 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT @@ -11,7 +11,7 @@ PREHOOK: Output: database:default PREHOOK: Output: default@partitioned1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- FILE VARIATION: TEXTFILE, Non-Vectorized, FetchWork, Partitioned -- -- -- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_table.q.out index 4003c20..0a2eabb 100644 --- ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_table.q.out +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_table.q.out @@ -1,9 +1,9 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -11,10 +11,10 @@ PREHOOK: Output: database:default PREHOOK: Output: default@table1 POSTHOOK: query: -- SORT_QUERY_RESULTS -- --- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Table -- -- --- SECTION VARIATION: ALTER TABLE ADD COLUMNS +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT --- CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -31,25 +31,12 @@ POSTHOOK: Output: default@table1 POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table1 -PREHOOK: type: QUERY -PREHOOK: Input: default@table1 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table1 -#### A masked pattern was here #### -a b -1 original -2 original -3 original -4 original -PREHOOK: query: -- ADD COLUMNS +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@table1 PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... alter table table1 add columns(c int, d string) POSTHOOK: type: ALTERTABLE_ADDCOLS POSTHOOK: Input: default@table1 @@ -67,6 +54,59 @@ POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2 POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] _col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 PREHOOK: query: select a,b,c,d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 @@ -84,185 +124,150 @@ a b c d 3 original NULL NULL 4 new 40 forty 4 original NULL NULL -PREHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY PREHOOK: Input: default@table1 -PREHOOK: Output: default@table1 -POSTHOOK: query: -- ADD COLUMNS -alter table table1 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 -POSTHOOK: Output: default@table1 -PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 PREHOOK: type: QUERY -PREHOOK: Input: default@values__tmp__table__3 -PREHOOK: Output: default@table1 -POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 POSTHOOK: type: QUERY -POSTHOOK: Input: default@values__tmp__table__3 -POSTHOOK: Output: default@table1 -POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] -POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] -_col0 _col1 _col2 _col3 _col4 -PREHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 PREHOOK: type: QUERY PREHOOK: Input: default@table1 #### A masked pattern was here #### -POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: query: select d from table1 POSTHOOK: type: QUERY POSTHOOK: Input: default@table1 #### A masked pattern was here #### -a b c d e -1 new 10 ten NULL -1 original NULL NULL NULL -2 new 20 twenty NULL -2 original NULL NULL NULL -3 new 30 thirty NULL -3 original NULL NULL NULL -4 new 40 forty NULL -4 original NULL NULL NULL -5 new 100 hundred another1 -6 new 200 two hundred another2 +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred PREHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@table3 +PREHOOK: Output: default@table2 POSTHOOK: query: -- --- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT -- smallint = (2-byte signed integer, from -32,768 to 32,767) -- -CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__4 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__4 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -3 original -4 original -6737 original -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__5 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__5 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] _col0 _col1 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -6737 original -72909 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -PREHOOK: type: ALTERTABLE_ADDCOLS -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 add columns(e string) -POSTHOOK: type: ALTERTABLE_ADDCOLS -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__6 -PREHOOK: Output: default@table3 -POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__6 -POSTHOOK: Output: default@table3 -POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -_col0 _col1 _col2 -PREHOOK: query: select a,b from table3 -PREHOOK: type: QUERY -PREHOOK: Input: default@table3 -#### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 -#### A masked pattern was here #### -a b -1000 original -200 new -3 original -32768 new -4 original -40000 new -5000 new -6737 original -72909 new -90000 new -PREHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -PREHOOK: type: ALTERTABLE_RENAMECOL -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: -- ADD COLUMNS ... RESTRICT -alter table table3 change column a a int -POSTHOOK: type: ALTERTABLE_RENAMECOL -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 -PREHOOK: query: select a,b from table3 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 PREHOOK: type: QUERY -PREHOOK: Input: default@table3 +PREHOOK: Input: default@table2 #### A masked pattern was here #### -POSTHOOK: query: select a,b from table3 +POSTHOOK: query: select a,b from table2 POSTHOOK: type: QUERY -POSTHOOK: Input: default@table3 +POSTHOOK: Input: default@table2 #### A masked pattern was here #### a b 1000 original @@ -285,13 +290,9 @@ POSTHOOK: Input: default@table1 POSTHOOK: Output: default@table1 PREHOOK: query: DROP TABLE table2 PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 POSTHOOK: query: DROP TABLE table2 POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE table3 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@table3 -PREHOOK: Output: default@table3 -POSTHOOK: query: DROP TABLE table3 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@table3 -POSTHOOK: Output: default@table3 +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_part.q.out new file mode 100644 index 0000000..e17b1b0 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_part.q.out @@ -0,0 +1,670 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +Explain +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [OP_3] (rows=10 width=13) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=10 width=13) + default@partitioned1,partitioned1,Tbl:COMPLETE,Col:NONE,Output:["a","b"] + +PREHOOK: query: select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table.q.out new file mode 100644 index 0000000..ffce206 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table.q.out @@ -0,0 +1,326 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +POSTHOOK: type: QUERY +Explain +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [OP_3] (rows=10 width=13) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=10 width=13) + default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["a","b"] + +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table_bug.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table_bug.q.out new file mode 100644 index 0000000..50113c8 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_vec_mapwork_table_bug.q.out @@ -0,0 +1,96 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- Note the use of hive.vectorized.use.vector.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_part.q.out new file mode 100644 index 0000000..729cd51 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_part.q.out @@ -0,0 +1,668 @@ +PREHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +Explain +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [OP_3] (rows=10 width=13) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=10 width=13) + default@partitioned1,partitioned1,Tbl:COMPLETE,Col:NONE,Output:["a","b"] + +PREHOOK: query: select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_table.q.out new file mode 100644 index 0000000..9860fc6 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_vecrow_mapwork_table.q.out @@ -0,0 +1,324 @@ +PREHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Note the use of hive.vectorized.use.row.serde.deserialize above which enables doing +-- vectorized reading of TEXTFILE format files using the row SERDE methods. +-- +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXTFILE, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +explain +select a,b from table1 +POSTHOOK: type: QUERY +Explain +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [OP_3] (rows=10 width=13) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=10 width=13) + default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["a","b"] + +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java index c6ff748..535a7ae 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java @@ -88,6 +88,18 @@ private InputByteBuffer inputByteBuffer = new InputByteBuffer(); + private boolean[] columnsToInclude; + + // Temporary objects to use when skipping for columnsToInclude. + private ReadDateResults dummyReadDateResults; + private ReadTimestampResults dummyReadTimestampResults; + private ReadStringResults dummyReadStringResults; + private ReadHiveCharResults dummyReadHiveCharResults; + private ReadHiveVarcharResults dummyReadHiveVarcharResults; + private ReadBinaryResults dummyReadBinaryResults; + private ReadIntervalYearMonthResults dummyReadIntervalYearMonthResults; + private ReadIntervalDayTimeResults dummyReadIntervalDayTimeResults; + /* * Use this constructor when only ascending sort order is used. */ @@ -109,6 +121,8 @@ public BinarySortableDeserializeRead(TypeInfo[] typeInfos, readBeyondConfiguredFieldsWarned = false; readBeyondBufferRangeWarned = false; bufferRangeHasExtraDataWarned = false; + + columnsToInclude = null; } // Not public since we must have column information. @@ -123,6 +137,16 @@ private BinarySortableDeserializeRead() { } /* + * If some fields are are not going to be used by the query, use this routine to specify + * the columns to return. The readCheckNull method will automatically return NULL for the + * other columns. + */ + @Override + public void setColumnsToInclude(boolean[] columnsToInclude) { + this.columnsToInclude = columnsToInclude; + } + + /* * Set the range of bytes to be deserialized. */ @Override @@ -177,13 +201,99 @@ public boolean readCheckNull() throws IOException { // We have a field and are positioned to it. - if (((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { - return false; - } + // Do we want this field? + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory(); + if (columnsToInclude != null && !columnsToInclude[fieldIndex]) { - // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read - // it here. - return earlyReadHiveDecimal(); + // We must read through the value to ignore it... + switch (primitiveCategory) { + case BOOLEAN: + readBoolean(); + break; + case BYTE: + readByte(); + break; + case SHORT: + readShort(); + break; + case INT: + readInt(); + break; + case LONG: + readLong(); + break; + case DATE: + if (dummyReadDateResults == null) { + dummyReadDateResults = createReadDateResults(); + } + readDate(dummyReadDateResults); + break; + case TIMESTAMP: + if (dummyReadTimestampResults == null) { + dummyReadTimestampResults = createReadTimestampResults(); + } + readTimestamp(dummyReadTimestampResults); + break; + case FLOAT: + readFloat(); + break; + case DOUBLE: + readDouble(); + break; + case STRING: + if (dummyReadStringResults == null) { + dummyReadStringResults = createReadStringResults(); + } + readString(dummyReadStringResults); + break; + case CHAR: + if (dummyReadHiveCharResults == null) { + dummyReadHiveCharResults = createReadHiveCharResults(); + } + readHiveChar(dummyReadHiveCharResults); + break; + case VARCHAR: + if (dummyReadHiveVarcharResults == null) { + dummyReadHiveVarcharResults = createReadHiveVarcharResults(); + } + readHiveVarchar(dummyReadHiveVarcharResults); + break; + case BINARY: + if (dummyReadBinaryResults == null) { + dummyReadBinaryResults = createReadBinaryResults(); + } + readBinary(dummyReadBinaryResults); + break; + case INTERVAL_YEAR_MONTH: + if (dummyReadIntervalYearMonthResults == null) { + dummyReadIntervalYearMonthResults = createReadIntervalYearMonthResults(); + } + readIntervalYearMonth(dummyReadIntervalYearMonthResults); + break; + case INTERVAL_DAY_TIME: + if (dummyReadIntervalDayTimeResults == null) { + dummyReadIntervalDayTimeResults = createReadIntervalDayTimeResults(); + } + readIntervalDayTime(dummyReadIntervalDayTimeResults); + break; + case DECIMAL: + // UNDONE: broken + earlyReadHiveDecimal(); + break; + default: + throw new RuntimeException("Unexpected primitive type category " + primitiveCategory); + } + return true; + } else { + // UNDONE + if (primitiveCategory != PrimitiveCategory.DECIMAL) { + return false; + } + + // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read + // it here. + return earlyReadHiveDecimal(); + } } /* diff --git serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java index c2b0cfc..dbadf4e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java @@ -60,6 +60,13 @@ TypeInfo[] typeInfos(); /* + * If some fields are are not going to be used by the query, use this routine to specify + * the columns to return. The readCheckNull method will automatically return NULL for the + * other columns. + */ + void setColumnsToInclude(boolean[] columnsToInclude); + + /* * Set the range of bytes to be deserialized. */ void set(byte[] bytes, int offset, int length); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java index f44a84b..29204b6 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java @@ -111,8 +111,11 @@ private boolean readBeyondBufferRangeWarned; private boolean bufferRangeHasExtraDataWarned; + private boolean[] columnsToInclude; + public LazySimpleDeserializeRead(TypeInfo[] typeInfos, byte separator, LazySerDeParameters lazyParams) { + this(); this.typeInfos = typeInfos; @@ -128,6 +131,12 @@ public LazySimpleDeserializeRead(TypeInfo[] typeInfos, readBeyondConfiguredFieldsWarned = false; readBeyondBufferRangeWarned = false; bufferRangeHasExtraDataWarned = false; + + columnsToInclude = null; + } + + public LazySimpleDeserializeRead(TypeInfo[] typeInfos, LazySerDeParameters lazyParams) { + this(typeInfos, lazyParams.getSeparators()[0], lazyParams); } // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. @@ -143,6 +152,16 @@ private LazySimpleDeserializeRead() { } /* + * If some fields are are not going to be used by the query, use this routine to specify + * the columns to return. The readCheckNull method will automatically return NULL for the + * other columns. + */ + @Override + public void setColumnsToInclude(boolean[] columnsToInclude) { + this.columnsToInclude = columnsToInclude; + } + + /* * Set the range of bytes to be deserialized. */ @Override @@ -172,7 +191,8 @@ public boolean readCheckNull() { } return true; } - if (offset > end) { + // UNDONE: Having a problem here reading a text file with 2 columnes when the schema allows 4... + if (offset >= end) { // We must allow for an empty field at the end, so no strict >= checking. if (!readBeyondBufferRangeWarned) { // Warn only once. @@ -210,9 +230,14 @@ public boolean readCheckNull() { } } - char[] charField = new char[fieldLength]; - for (int c = 0; c < charField.length; c++) { - charField[c] = (char) (bytes[fieldStart + c] & 0xFF); + // char[] charField = new char[fieldLength]; + // for (int c = 0; c < charField.length; c++) { + // charField[c] = (char) (bytes[fieldStart + c] & 0xFF); + // } + + // Do we want this field? + if (columnsToInclude != null && !columnsToInclude[fieldIndex]) { + return true; } // Is the field the configured string representing NULL? diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java index c5f0730..9d32806 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java @@ -82,6 +82,8 @@ private boolean readBeyondBufferRangeWarned; private boolean bufferRangeHasExtraDataWarned; + private boolean[] columnsToInclude; + public LazyBinaryDeserializeRead(TypeInfo[] typeInfos) { this.typeInfos = typeInfos; fieldCount = typeInfos.length; @@ -90,6 +92,8 @@ public LazyBinaryDeserializeRead(TypeInfo[] typeInfos) { readBeyondConfiguredFieldsWarned = false; readBeyondBufferRangeWarned = false; bufferRangeHasExtraDataWarned = false; + + columnsToInclude = null; } // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. @@ -104,6 +108,16 @@ private LazyBinaryDeserializeRead() { } /* + * If some fields are are not going to be used by the query, use this routine to specify + * the columns to return. The readCheckNull method will automatically return NULL for the + * other columns. + */ + @Override + public void setColumnsToInclude(boolean[] columnsToInclude) { + this.columnsToInclude = columnsToInclude; + } + + /* * Set the range of bytes to be deserialized. */ @Override @@ -155,13 +169,34 @@ public boolean readCheckNull() throws IOException { // We have a field and are positioned to it. - if (((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { - return false; - } + // Do we want this field? + if (columnsToInclude != null && !columnsToInclude[fieldIndex]) { + + // When NULL, we need to move past this field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } - // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read - // it here. - return earlyReadHiveDecimal(); + return true; + } else { + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory(); + if (primitiveCategory != PrimitiveCategory.DECIMAL) { + return false; + } + + // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read + // it here. + return earlyReadHiveDecimal(); + } } // When NULL, we need to move past this field.