Index: . =================================================================== --- . (revision 1673556) +++ . (working copy) Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /hive/trunk:r1669723-1673506 Index: beeline/src/java/org/apache/hive/beeline/Commands.java =================================================================== --- beeline/src/java/org/apache/hive/beeline/Commands.java (revision 1673556) +++ beeline/src/java/org/apache/hive/beeline/Commands.java (working copy) @@ -760,14 +760,22 @@ while (beeLine.getConsoleReader() != null && !(line.trim().endsWith(";")) && beeLine.getOpts().isAllowMultiLineCommand()) { - StringBuilder prompt = new StringBuilder(beeLine.getPrompt()); - for (int i = 0; i < prompt.length() - 1; i++) { - if (prompt.charAt(i) != '>') { - prompt.setCharAt(i, i % 2 == 0 ? '.' : ' '); + if (!beeLine.getOpts().isSilent()) { + StringBuilder prompt = new StringBuilder(beeLine.getPrompt()); + for (int i = 0; i < prompt.length() - 1; i++) { + if (prompt.charAt(i) != '>') { + prompt.setCharAt(i, i % 2 == 0 ? '.' : ' '); + } } } - String extra = beeLine.getConsoleReader().readLine(prompt.toString()); + String extra = null; + if (beeLine.getOpts().isSilent() && beeLine.getOpts().getScriptFile() != null) { + extra = beeLine.getConsoleReader().readLine(null, jline.console.ConsoleReader.NULL_MASK); + } else { + extra = beeLine.getConsoleReader().readLine(beeLine.getPrompt()); + } + if (extra == null) { //it happens when using -f and the line of cmds does not end with ; break; } Index: bin/beeline =================================================================== --- bin/beeline (revision 1673556) +++ bin/beeline (working copy) @@ -18,4 +18,8 @@ bin=`dirname "$0"` bin=`cd "$bin"; pwd` +# Set Hadoop User classpath to true so that httpclient jars are taken from +# hive lib instead of hadoop lib. +export HADOOP_USER_CLASSPATH_FIRST=true + . "$bin"/hive --service beeline "$@" Index: cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java =================================================================== --- cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java (revision 1673556) +++ cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java (working copy) @@ -37,9 +37,11 @@ import java.util.Set; import com.google.common.base.Splitter; + import jline.console.ConsoleReader; import jline.console.completer.Completer; import jline.console.history.FileHistory; +import jline.console.history.History; import jline.console.history.PersistentHistory; import jline.console.completer.StringsCompleter; import jline.console.completer.ArgumentCompleter; @@ -93,12 +95,16 @@ public static final String HIVERCFILE = ".hiverc"; private final LogHelper console; + protected ConsoleReader reader; private Configuration conf; public CliDriver() { SessionState ss = SessionState.get(); conf = (ss != null) ? ss.getConf() : new Configuration(); Log LOG = LogFactory.getLog("CliDriver"); + if (LOG.isDebugEnabled()) { + LOG.debug("CliDriver inited with classpath " + System.getProperty("java.class.path")); + } console = new LogHelper(LOG); } @@ -712,34 +718,10 @@ return 3; } - ConsoleReader reader = getConsoleReader(); - reader.setBellEnabled(false); - // reader.setDebug(new PrintWriter(new FileWriter("writer.debug", true))); - for (Completer completer : getCommandCompleter()) { - reader.addCompleter(completer); - } + setupConsoleReader(); String line; - final String HISTORYFILE = ".hivehistory"; - String historyDirectory = System.getProperty("user.home"); - PersistentHistory history = null; - try { - if ((new File(historyDirectory)).exists()) { - String historyFile = historyDirectory + File.separator + HISTORYFILE; - history = new FileHistory(new File(historyFile)); - reader.setHistory(history); - } else { - System.err.println("WARNING: Directory for Hive history file: " + historyDirectory + - " does not exist. History will not be available during this session."); - } - } catch (Exception e) { - System.err.println("WARNING: Encountered an error while trying to initialize Hive's " + - "history file. History will not be available during this session."); - System.err.println(e.getMessage()); - } - int ret = 0; - String prefix = ""; String curDB = getFormattedDb(conf, ss); String curPrompt = prompt + curDB; @@ -763,15 +745,56 @@ } } - if (history != null) { - history.flush(); - } return ret; } - protected ConsoleReader getConsoleReader() throws IOException{ - return new ConsoleReader(); + private void setupCmdHistory() { + final String HISTORYFILE = ".hivehistory"; + String historyDirectory = System.getProperty("user.home"); + PersistentHistory history = null; + try { + if ((new File(historyDirectory)).exists()) { + String historyFile = historyDirectory + File.separator + HISTORYFILE; + history = new FileHistory(new File(historyFile)); + reader.setHistory(history); + } else { + System.err.println("WARNING: Directory for Hive history file: " + historyDirectory + + " does not exist. History will not be available during this session."); + } + } catch (Exception e) { + System.err.println("WARNING: Encountered an error while trying to initialize Hive's " + + "history file. History will not be available during this session."); + System.err.println(e.getMessage()); + } + + System.out.println("WARNING: Hive CLI is deprecated and migration to Beeline is recommended."); + + // add shutdown hook to flush the history to history file + Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { + @Override + public void run() { + History h = reader.getHistory(); + if (h instanceof FileHistory) { + try { + ((FileHistory) h).flush(); + } catch (IOException e) { + System.err.println("WARNING: Failed to write command history file: " + e.getMessage()); + } + } + } + })); } + + protected void setupConsoleReader() throws IOException { + reader = new ConsoleReader(); + reader.setExpandEvents(false); + reader.setBellEnabled(false); + for (Completer completer : getCommandCompleter()) { + reader.addCompleter(completer); + } + setupCmdHistory(); + } + /** * Retrieve the current database name string to display, based on the * configuration value. Index: cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java =================================================================== --- cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java (revision 1673556) +++ cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java (working copy) @@ -68,11 +68,13 @@ // Some of these tests require intercepting System.exit() using the SecurityManager. // It is safer to register/unregister our SecurityManager during setup/teardown instead // of doing it within the individual test cases. + @Override public void setUp() { securityManager = System.getSecurityManager(); System.setSecurityManager(new NoExitSecurityManager(securityManager)); } + @Override public void tearDown() { System.setSecurityManager(securityManager); } @@ -322,7 +324,7 @@ private static void setEnvLinux(String key, String value) throws Exception { Class[] classes = Collections.class.getDeclaredClasses(); - Map env = (Map) System.getenv(); + Map env = System.getenv(); for (Class cl : classes) { if ("java.util.Collections$UnmodifiableMap".equals(cl.getName())) { Field field = cl.getDeclaredField("m"); @@ -362,9 +364,8 @@ private static class FakeCliDriver extends CliDriver { @Override - protected ConsoleReader getConsoleReader() throws IOException { - ConsoleReader reslt = new FakeConsoleReader(); - return reslt; + protected void setupConsoleReader() throws IOException { + reader = new FakeConsoleReader(); } } Index: common/pom.xml =================================================================== --- common/pom.xml (revision 1673556) +++ common/pom.xml (working copy) @@ -93,6 +93,11 @@ ${junit.version} test + + org.json + json + ${json.version} + Index: common/src/java/org/apache/hadoop/hive/common/FileUtils.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/FileUtils.java (revision 1673556) +++ common/src/java/org/apache/hadoop/hive/common/FileUtils.java (working copy) @@ -335,16 +335,16 @@ * @param fs * file system * @param path - * @return the argument path if it exists or a parent path exists. Returns - * NULL root is only parent that exists + * @return FileStatus for argument path if it exists or the first ancestor in the path that exists * @throws IOException */ - public static Path getPathOrParentThatExists(FileSystem fs, Path path) throws IOException { - if (!fs.exists(path)) { - Path parentPath = path.getParent(); - return getPathOrParentThatExists(fs, parentPath); + public static FileStatus getPathOrParentThatExists(FileSystem fs, Path path) throws IOException { + FileStatus stat = FileUtils.getFileStatusOrNull(fs, path); + if (stat != null) { + return stat; } - return path; + Path parentPath = path.getParent(); + return getPathOrParentThatExists(fs, parentPath); } /** @@ -743,4 +743,20 @@ } + /** + * Attempts to get file status. This method differs from the FileSystem API in that it returns + * null instead of throwing FileNotFoundException if the path does not exist. + * + * @param fs file system to check + * @param path file system path to check + * @return FileStatus for path or null if path does not exist + * @throws IOException if there is an I/O error + */ + public static FileStatus getFileStatusOrNull(FileSystem fs, Path path) throws IOException { + try { + return fs.getFileStatus(path); + } catch (FileNotFoundException e) { + return null; + } + } } Index: common/src/java/org/apache/hadoop/hive/common/jsonexplain/JsonParser.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/JsonParser.java (revision 0) +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/JsonParser.java (working copy) @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain; + +import java.io.PrintStream; + +import org.json.JSONObject; + +/** + * JsonParser is the interface for classes that print a JSONObject + * into outputStream. + */ +public interface JsonParser { + public void print(JSONObject inputObject, PrintStream outputStream) throws Exception; +} Index: common/src/java/org/apache/hadoop/hive/common/jsonexplain/JsonParserFactory.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/JsonParserFactory.java (revision 0) +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/JsonParserFactory.java (working copy) @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain; + +import org.apache.hadoop.hive.common.jsonexplain.tez.TezJsonParser; +import org.apache.hadoop.hive.conf.HiveConf; + +public class JsonParserFactory { + + private JsonParserFactory() { + // avoid instantiation + } + + /** + * @param conf + * @return the appropriate JsonParser to print a JSONObject into outputStream. + */ + public static JsonParser getParser(HiveConf conf) { + if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { + return new TezJsonParser(); + } + return null; + } +} Index: common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Attr.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Attr.java (revision 0) +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Attr.java (working copy) @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain.tez; + +public class Attr implements Comparable { + String name; + String value; + + public Attr(String name, String value) { + super(); + this.name = name; + this.value = value; + } + + @Override + public int compareTo(Attr o) { + return this.name.compareToIgnoreCase(o.name); + } + + public String toString() { + return this.name + this.value; + } +} Index: common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Connection.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Connection.java (revision 0) +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Connection.java (working copy) @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain.tez; + +public class Connection { + public String type; + public Vertex from; + + public Connection(String type, Vertex from) { + super(); + this.type = type; + this.from = from; + } +} Index: common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Op.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Op.java (revision 0) +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Op.java (working copy) @@ -0,0 +1,226 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain.tez; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.json.JSONException; +import org.json.JSONObject; + +public class Op { + String name; + String operatorId; + Op parent; + List children; + List attrs; + // the jsonObject for this operator + JSONObject opObject; + // the vertex that this operator belongs to + Vertex vertex; + // the vertex that this operator output to if this operator is a + // ReduceOutputOperator + String outputVertexName; + + public Op(String name, String id, String outputVertexName, List children, List attrs, + JSONObject opObject, Vertex vertex) throws JSONException { + super(); + this.name = name; + this.operatorId = id; + this.outputVertexName = outputVertexName; + this.children = children; + this.attrs = attrs; + this.opObject = opObject; + this.vertex = vertex; + } + + private void inlineJoinOp() throws Exception { + // inline map join operator + if (this.name.equals("Map Join Operator")) { + JSONObject mapjoinObj = opObject.getJSONObject("Map Join Operator"); + // get the map for posToVertex + JSONObject verticeObj = mapjoinObj.getJSONObject("input vertices:"); + Map posToVertex = new HashMap<>(); + for (String pos : JSONObject.getNames(verticeObj)) { + String vertexName = verticeObj.getString(pos); + posToVertex.put(pos, vertexName); + // update the connection + Connection c = null; + for (Connection connection : vertex.parentConnections) { + if (connection.from.name.equals(vertexName)) { + c = connection; + break; + } + } + if (c != null) { + TezJsonParser.addInline(this, c); + } + } + // update the attrs + removeAttr("input vertices:"); + // update the keys to use vertex name + JSONObject keys = mapjoinObj.getJSONObject("keys:"); + if (keys.length() != 0) { + JSONObject newKeys = new JSONObject(); + for (String key : JSONObject.getNames(keys)) { + String vertexName = posToVertex.get(key); + if (vertexName != null) { + newKeys.put(vertexName, keys.get(key)); + } else { + newKeys.put(this.vertex.name, keys.get(key)); + } + } + // update the attrs + removeAttr("keys:"); + this.attrs.add(new Attr("keys:", newKeys.toString())); + } + } + // inline merge join operator in a self-join + else if (this.name.equals("Merge Join Operator")) { + if (this.vertex != null) { + for (Vertex v : this.vertex.mergeJoinDummyVertexs) { + TezJsonParser.addInline(this, new Connection(null, v)); + } + } + } else { + throw new Exception("Unknown join operator"); + } + } + + private String getNameWithOpId() { + if (operatorId != null) { + return this.name + " [" + operatorId + "]"; + } else { + return this.name; + } + } + + /** + * @param out + * @param indentFlag + * @param branchOfJoinOp + * This parameter is used to show if it is a branch of a Join + * operator so that we can decide the corresponding indent. + * @throws Exception + */ + public void print(PrintStream out, List indentFlag, boolean branchOfJoinOp) + throws Exception { + // print name + if (TezJsonParser.printSet.contains(this)) { + out.println(TezJsonParser.prefixString(indentFlag) + " Please refer to the previous " + + this.getNameWithOpId()); + return; + } + TezJsonParser.printSet.add(this); + if (!branchOfJoinOp) { + out.println(TezJsonParser.prefixString(indentFlag) + this.getNameWithOpId()); + } else { + out.println(TezJsonParser.prefixString(indentFlag, "|<-") + this.getNameWithOpId()); + } + branchOfJoinOp = false; + // if this operator is a join operator + if (this.name.contains("Join")) { + inlineJoinOp(); + branchOfJoinOp = true; + } + // if this operator is the last operator, we summarize the non-inlined + // vertex + List noninlined = new ArrayList<>(); + if (this.parent == null) { + if (this.vertex != null) { + for (Connection connection : this.vertex.parentConnections) { + if (!TezJsonParser.isInline(connection.from)) { + noninlined.add(connection); + } + } + } + } + // print attr + List attFlag = new ArrayList<>(); + attFlag.addAll(indentFlag); + // should print | if (1) it is branchOfJoinOp or (2) it is the last op and + // has following non-inlined vertex + if (branchOfJoinOp || (this.parent == null && !noninlined.isEmpty())) { + attFlag.add(true); + } else { + attFlag.add(false); + } + Collections.sort(attrs); + for (Attr attr : attrs) { + out.println(TezJsonParser.prefixString(attFlag) + attr.toString()); + } + // print inline vertex + if (TezJsonParser.inlineMap.containsKey(this)) { + for (int index = 0; index < TezJsonParser.inlineMap.get(this).size(); index++) { + Connection connection = TezJsonParser.inlineMap.get(this).get(index); + List vertexFlag = new ArrayList<>(); + vertexFlag.addAll(indentFlag); + if (branchOfJoinOp) { + vertexFlag.add(true); + } + // if there is an inline vertex but the operator itself is not on a join + // branch, + // then it means it is from a vertex created by an operator tree, + // e.g., fetch operator, etc. + else { + vertexFlag.add(false); + } + connection.from.print(out, vertexFlag, connection.type, this.vertex); + } + } + // print parent op, i.e., where data comes from + if (this.parent != null) { + List parentFlag = new ArrayList<>(); + parentFlag.addAll(indentFlag); + parentFlag.add(false); + this.parent.print(out, parentFlag, branchOfJoinOp); + } + // print next vertex + else { + for (int index = 0; index < noninlined.size(); index++) { + Vertex v = noninlined.get(index).from; + List vertexFlag = new ArrayList<>(); + vertexFlag.addAll(indentFlag); + if (index != noninlined.size() - 1) { + vertexFlag.add(true); + } else { + vertexFlag.add(false); + } + v.print(out, vertexFlag, noninlined.get(index).type, this.vertex); + } + } + } + + public void removeAttr(String name) { + int removeIndex = -1; + for (int index = 0; index < attrs.size(); index++) { + if (attrs.get(index).name.equals(name)) { + removeIndex = index; + break; + } + } + if (removeIndex != -1) { + attrs.remove(removeIndex); + } + } +} Index: common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Stage.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Stage.java (revision 0) +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Stage.java (working copy) @@ -0,0 +1,253 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain.tez; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.fs.Path; +import org.codehaus.jackson.JsonParseException; +import org.codehaus.jackson.map.JsonMappingException; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +public class Stage { + String name; + // upstream stages, e.g., root stage + List parentStages; + // downstream stages. + List childStages; + Map vertexs; + List attrs; + LinkedHashMap> tezStageDependency; + // some stage may contain only a single operator, e.g., create table operator, + // fetch operator. + Op op; + + public Stage(String name) { + super(); + this.name = name; + parentStages = new ArrayList<>(); + childStages = new ArrayList<>(); + attrs = new ArrayList<>(); + vertexs = new LinkedHashMap<>(); + } + + public void addDependency(JSONObject object, Map stages) throws JSONException { + if (!object.has("ROOT STAGE")) { + String names = object.getString("DEPENDENT STAGES"); + for (String name : names.split(",")) { + Stage parent = stages.get(name.trim()); + this.parentStages.add(parent); + parent.childStages.add(this); + } + } + } + + /** + * @param object + * @throws Exception + * If the object of stage contains "Tez", we need to extract the + * vertices and edges Else we need to directly extract operators + * and/or attributes. + */ + public void extractVertex(JSONObject object) throws Exception { + if (object.has("Tez")) { + this.tezStageDependency = new LinkedHashMap<>(); + JSONObject tez = (JSONObject) object.get("Tez"); + JSONObject vertices = tez.getJSONObject("Vertices:"); + if (tez.has("Edges:")) { + JSONObject edges = tez.getJSONObject("Edges:"); + // iterate for the first time to get all the vertices + for (String to : JSONObject.getNames(edges)) { + vertexs.put(to, new Vertex(to, vertices.getJSONObject(to))); + } + // iterate for the second time to get all the vertex dependency + for (String to : JSONObject.getNames(edges)) { + Object o = edges.get(to); + Vertex v = vertexs.get(to); + // 1 to 1 mapping + if (o instanceof JSONObject) { + JSONObject obj = (JSONObject) o; + String parent = obj.getString("parent"); + Vertex parentVertex = vertexs.get(parent); + if (parentVertex == null) { + parentVertex = new Vertex(parent, vertices.getJSONObject(parent)); + vertexs.put(parent, parentVertex); + } + String type = obj.getString("type"); + // for union vertex, we reverse the dependency relationship + if (!"CONTAINS".equals(type)) { + v.addDependency(new Connection(type, parentVertex)); + parentVertex.children.add(v); + } else { + parentVertex.addDependency(new Connection(type, v)); + v.children.add(parentVertex); + } + this.tezStageDependency.put(v, Arrays.asList(new Connection(type, parentVertex))); + } else { + // 1 to many mapping + JSONArray from = (JSONArray) o; + List list = new ArrayList<>(); + for (int index = 0; index < from.length(); index++) { + JSONObject obj = from.getJSONObject(index); + String parent = obj.getString("parent"); + Vertex parentVertex = vertexs.get(parent); + if (parentVertex == null) { + parentVertex = new Vertex(parent, vertices.getJSONObject(parent)); + vertexs.put(parent, parentVertex); + } + String type = obj.getString("type"); + if (!"CONTAINS".equals(type)) { + v.addDependency(new Connection(type, parentVertex)); + parentVertex.children.add(v); + } else { + parentVertex.addDependency(new Connection(type, v)); + v.children.add(parentVertex); + } + list.add(new Connection(type, parentVertex)); + } + this.tezStageDependency.put(v, list); + } + } + } else { + for (String vertexName : JSONObject.getNames(vertices)) { + vertexs.put(vertexName, new Vertex(vertexName, vertices.getJSONObject(vertexName))); + } + } + // The opTree in vertex is extracted + for (Vertex v : vertexs.values()) { + if (!v.union) { + v.extractOpTree(); + v.checkMultiReduceOperator(); + } + } + } else { + String[] names = JSONObject.getNames(object); + for (String name : names) { + if (name.contains("Operator")) { + this.op = extractOp(name, object.getJSONObject(name)); + } else { + attrs.add(new Attr(name, object.get(name).toString())); + } + } + } + } + + /** + * @param opName + * @param opObj + * @return + * @throws JSONException + * @throws JsonParseException + * @throws JsonMappingException + * @throws IOException + * @throws Exception + * This method address the create table operator, fetch operator, + * etc + */ + Op extractOp(String opName, JSONObject opObj) throws JSONException, JsonParseException, + JsonMappingException, IOException, Exception { + List attrs = new ArrayList<>(); + Vertex v = null; + if (opObj.length() > 0) { + String[] names = JSONObject.getNames(opObj); + for (String name : names) { + Object o = opObj.get(name); + if (isPrintable(o)) { + attrs.add(new Attr(name, o.toString())); + } else if (o instanceof JSONObject) { + JSONObject attrObj = (JSONObject) o; + if (attrObj.length() > 0) { + if (name.equals("Processor Tree:")) { + JSONObject object = new JSONObject(); + object.put(name, attrObj); + v = new Vertex(null, object); + v.extractOpTree(); + } else { + for (String attrName : JSONObject.getNames(attrObj)) { + attrs.add(new Attr(attrName, attrObj.get(attrName).toString())); + } + } + } + } else { + throw new Exception("Unsupported object in " + this.name); + } + } + } + Op op = new Op(opName, null, null, null, attrs, null, v); + if (v != null) { + TezJsonParser.addInline(op, new Connection(null, v)); + } + return op; + } + + private boolean isPrintable(Object val) { + if (val instanceof Boolean || val instanceof String || val instanceof Integer + || val instanceof Long || val instanceof Byte || val instanceof Float + || val instanceof Double || val instanceof Path) { + return true; + } + if (val != null && val.getClass().isPrimitive()) { + return true; + } + return false; + } + + public void print(PrintStream out, List indentFlag) throws JSONException, Exception { + // print stagename + if (TezJsonParser.printSet.contains(this)) { + out.println(TezJsonParser.prefixString(indentFlag) + " Please refer to the previous " + + this.name); + return; + } + TezJsonParser.printSet.add(this); + out.println(TezJsonParser.prefixString(indentFlag) + this.name); + // print vertexes + List nextIndentFlag = new ArrayList<>(); + nextIndentFlag.addAll(indentFlag); + nextIndentFlag.add(false); + for (Vertex candidate : this.vertexs.values()) { + if (!TezJsonParser.isInline(candidate) && candidate.children.isEmpty()) { + candidate.print(out, nextIndentFlag, null, null); + } + } + if (!attrs.isEmpty()) { + Collections.sort(attrs); + for (Attr attr : attrs) { + out.println(TezJsonParser.prefixString(nextIndentFlag) + attr.toString()); + } + } + if (op != null) { + op.print(out, nextIndentFlag, false); + } + nextIndentFlag.add(false); + // print dependent stages + for (Stage stage : this.parentStages) { + stage.print(out, nextIndentFlag); + } + } +} Index: common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java (revision 0) +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/TezJsonParser.java (working copy) @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain.tez; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.jsonexplain.JsonParser; +import org.codehaus.jackson.JsonParseException; +import org.codehaus.jackson.map.JsonMappingException; +import org.json.JSONException; +import org.json.JSONObject; + +public class TezJsonParser implements JsonParser { + JSONObject inputObject; + Map stages; + PrintStream outputStream; + protected final Log LOG; + // the object that has been printed. + public static Set printSet = new HashSet<>(); + // the vertex that should be inlined. + public static Map> inlineMap = new HashMap<>(); + public TezJsonParser() { + super(); + LOG = LogFactory.getLog(this.getClass().getName()); + } + public void extractStagesAndPlans() throws JSONException, JsonParseException, + JsonMappingException, Exception, IOException { + // extract stages + this.stages = new HashMap(); + JSONObject dependency = inputObject.getJSONObject("STAGE DEPENDENCIES"); + if (dependency.length() > 0) { + // iterate for the first time to get all the names of stages. + for (String stageName : JSONObject.getNames(dependency)) { + this.stages.put(stageName, new Stage(stageName)); + } + // iterate for the second time to get all the dependency. + for (String stageName : JSONObject.getNames(dependency)) { + JSONObject dependentStageNames = dependency.getJSONObject(stageName); + this.stages.get(stageName).addDependency(dependentStageNames, this.stages); + } + } + // extract stage plans + JSONObject stagePlans = inputObject.getJSONObject("STAGE PLANS"); + if (stagePlans.length() > 0) { + for (String stageName : JSONObject.getNames(stagePlans)) { + JSONObject stagePlan = stagePlans.getJSONObject(stageName); + this.stages.get(stageName).extractVertex(stagePlan); + } + } + } + + /** + * @param indentFlag + * help to generate correct indent + * @return + */ + public static String prefixString(List indentFlag) { + StringBuilder sb = new StringBuilder(); + for (int index = 0; index < indentFlag.size(); index++) { + if (indentFlag.get(index)) + sb.append("| "); + else + sb.append(" "); + } + return sb.toString(); + } + + /** + * @param indentFlag + * @param tail + * help to generate correct indent with a specific tail + * @return + */ + public static String prefixString(List indentFlag, String tail) { + StringBuilder sb = new StringBuilder(); + for (int index = 0; index < indentFlag.size(); index++) { + if (indentFlag.get(index)) + sb.append("| "); + else + sb.append(" "); + } + int len = sb.length(); + return sb.replace(len - tail.length(), len, tail).toString(); + } + + @Override + public void print(JSONObject inputObject, PrintStream outputStream) throws Exception { + LOG.info("JsonParser is parsing\n" + inputObject.toString()); + this.inputObject = inputObject; + this.outputStream = outputStream; + this.extractStagesAndPlans(); + // print out the cbo info + if (inputObject.has("cboInfo")) { + outputStream.println(inputObject.getString("cboInfo")); + outputStream.println(); + } + // print out the vertex dependency in root stage + for (Stage candidate : this.stages.values()) { + if (candidate.tezStageDependency != null && candidate.tezStageDependency.size() > 0) { + outputStream.println("Vertex dependency in root stage"); + for (Entry> entry : candidate.tezStageDependency.entrySet()) { + StringBuffer sb = new StringBuffer(); + sb.append(entry.getKey().name); + sb.append(" <- "); + boolean printcomma = false; + for (Connection connection : entry.getValue()) { + if (printcomma) { + sb.append(", "); + } else { + printcomma = true; + } + sb.append(connection.from.name + " (" + connection.type + ")"); + } + outputStream.println(sb.toString()); + } + outputStream.println(); + } + } + List indentFlag = new ArrayList<>(); + // print out all the stages that have no childStages. + for (Stage candidate : this.stages.values()) { + if (candidate.childStages.isEmpty()) { + candidate.print(outputStream, indentFlag); + } + } + } + + public static void addInline(Op op, Connection connection) { + List list = inlineMap.get(op); + if (list == null) { + list = new ArrayList<>(); + list.add(connection); + inlineMap.put(op, list); + } else { + list.add(connection); + } + } + public static boolean isInline(Vertex v) { + for(List list : inlineMap.values()){ + for (Connection connection : list) { + if(connection.from.equals(v)){ + return true; + } + } + } + return false; + } +} Index: common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Vertex.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Vertex.java (revision 0) +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Vertex.java (working copy) @@ -0,0 +1,248 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.jsonexplain.tez; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +import org.codehaus.jackson.JsonParseException; +import org.codehaus.jackson.map.JsonMappingException; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +public class Vertex { + public String name; + // vertex's parent connections. + public List parentConnections; + // vertex's children vertex. + public List children; + // the jsonObject for this vertex + public JSONObject vertexObject; + // whether this vertex is a union vertex + public boolean union; + // whether this vertex is dummy (which does not really exists but is created), + // e.g., a dummy vertex for a mergejoin branch + public boolean dummy; + // the rootOps in this vertex + public List rootOps; + // we create a dummy vertex for a mergejoin branch for a self join if this + // vertex is a mergejoin + public List mergeJoinDummyVertexs; + // whether this vertex has multiple reduce operators + boolean hasMultiReduceOp; + + public Vertex(String name, JSONObject vertexObject) { + super(); + this.name = name; + if (this.name != null && this.name.contains("Union")) { + this.union = true; + } else { + this.union = false; + } + this.dummy = false; + this.vertexObject = vertexObject; + this.parentConnections = new ArrayList<>(); + this.children = new ArrayList<>(); + this.rootOps = new ArrayList<>(); + this.mergeJoinDummyVertexs = new ArrayList<>(); + this.hasMultiReduceOp = false; + } + + public void addDependency(Connection connection) throws JSONException { + this.parentConnections.add(connection); + } + + /** + * @throws JSONException + * @throws JsonParseException + * @throws JsonMappingException + * @throws IOException + * @throws Exception + * We assume that there is a single top-level Map Operator Tree or a + * Reduce Operator Tree in a vertex + */ + public void extractOpTree() throws JSONException, JsonParseException, JsonMappingException, + IOException, Exception { + if (vertexObject.length() != 0) { + for (String key : JSONObject.getNames(vertexObject)) { + if (key.equals("Map Operator Tree:")) { + extractOp(vertexObject.getJSONArray(key).getJSONObject(0)); + } else if (key.equals("Reduce Operator Tree:") || key.equals("Processor Tree:")) { + extractOp(vertexObject.getJSONObject(key)); + } + // this is the case when we have a map-side SMB join + // one input of the join is treated as a dummy vertex + else if (key.equals("Join:")) { + JSONArray array = vertexObject.getJSONArray(key); + for (int index = 0; index < array.length(); index++) { + JSONObject mpOpTree = array.getJSONObject(index); + Vertex v = new Vertex("", mpOpTree); + v.extractOpTree(); + v.dummy = true; + mergeJoinDummyVertexs.add(v); + } + } else { + throw new Exception("unsupported operator tree in vertex " + this.name); + } + } + } + } + + /** + * @param operator + * @param parent + * @return + * @throws JSONException + * @throws JsonParseException + * @throws JsonMappingException + * @throws IOException + * @throws Exception + * assumption: each operator only has one parent but may have many + * children + */ + Op extractOp(JSONObject operator) throws JSONException, JsonParseException, JsonMappingException, + IOException, Exception { + String[] names = JSONObject.getNames(operator); + if (names.length != 1) { + throw new Exception("Expect only one operator in " + operator.toString()); + } else { + String opName = names[0]; + JSONObject attrObj = (JSONObject) operator.get(opName); + List attrs = new ArrayList<>(); + List children = new ArrayList<>(); + String id = null; + String outputVertexName = null; + for (String attrName : JSONObject.getNames(attrObj)) { + if (attrName.equals("children")) { + Object childrenObj = attrObj.get(attrName); + if (childrenObj instanceof JSONObject) { + if (((JSONObject) childrenObj).length() != 0) { + children.add(extractOp((JSONObject) childrenObj)); + } + } else if (childrenObj instanceof JSONArray) { + if (((JSONArray) childrenObj).length() != 0) { + JSONArray array = ((JSONArray) childrenObj); + for (int index = 0; index < array.length(); index++) { + children.add(extractOp(array.getJSONObject(index))); + } + } + } else { + throw new Exception("Unsupported operator " + this.name + + "'s children operator is neither a jsonobject nor a jsonarray"); + } + } else { + if (attrName.equals("OperatorId:")) { + id = attrObj.get(attrName).toString(); + } else if (attrName.equals("outputname:")) { + outputVertexName = attrObj.get(attrName).toString(); + } else { + attrs.add(new Attr(attrName, attrObj.get(attrName).toString())); + } + } + } + Op op = new Op(opName, id, outputVertexName, children, attrs, operator, this); + if (!children.isEmpty()) { + for (Op child : children) { + child.parent = op; + } + } else { + this.rootOps.add(op); + } + return op; + } + } + + public void print(PrintStream out, List indentFlag, String type, Vertex callingVertex) + throws JSONException, Exception { + // print vertexname + if (TezJsonParser.printSet.contains(this) && !hasMultiReduceOp) { + if (type != null) { + out.println(TezJsonParser.prefixString(indentFlag, "|<-") + + " Please refer to the previous " + this.name + " [" + type + "]"); + } else { + out.println(TezJsonParser.prefixString(indentFlag, "|<-") + + " Please refer to the previous " + this.name); + } + return; + } + TezJsonParser.printSet.add(this); + if (type != null) { + out.println(TezJsonParser.prefixString(indentFlag, "|<-") + this.name + " [" + type + "]"); + } else if (this.name != null) { + out.println(TezJsonParser.prefixString(indentFlag) + this.name); + } + // print operators + if (hasMultiReduceOp) { + // find the right op + Op choose = null; + for (Op op : this.rootOps) { + if (op.outputVertexName.equals(callingVertex.name)) { + choose = op; + } + } + if (choose != null) { + choose.print(out, indentFlag, false); + } else { + throw new Exception("Can not find the right reduce output operator for vertex " + this.name); + } + } else { + for (Op op : this.rootOps) { + // dummy vertex is treated as a branch of a join operator + if (this.dummy) { + op.print(out, indentFlag, true); + } else { + op.print(out, indentFlag, false); + } + } + } + if (this.union) { + // print dependent vertexs + for (int index = 0; index < this.parentConnections.size(); index++) { + Connection connection = this.parentConnections.get(index); + List unionFlag = new ArrayList<>(); + unionFlag.addAll(indentFlag); + if (index != this.parentConnections.size() - 1) { + unionFlag.add(true); + } else { + unionFlag.add(false); + } + connection.from.print(out, unionFlag, connection.type, this); + } + } + } + + /** + * We check if a vertex has multiple reduce operators. + */ + public void checkMultiReduceOperator() { + // check if it is a reduce vertex and its children is more than 1; + if (!this.name.contains("Reduce") || this.rootOps.size() < 2) { + return; + } + // check if all the child ops are reduce output operators + for (Op op : this.rootOps) { + if (!op.name.contains("Reduce")) + return; + } + this.hasMultiReduceOp = true; + } +} Index: common/src/java/org/apache/hadoop/hive/common/type/HiveChar.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/type/HiveChar.java (revision 1673556) +++ common/src/java/org/apache/hadoop/hive/common/type/HiveChar.java (working copy) @@ -55,7 +55,7 @@ return StringUtils.stripEnd(value, " "); } - protected String getPaddedValue() { + public String getPaddedValue() { return value; } Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1673556) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -167,6 +167,7 @@ HiveConf.ConfVars.HIVE_TXN_MANAGER, HiveConf.ConfVars.HIVE_TXN_TIMEOUT, HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH, + HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION }; /** @@ -948,6 +949,13 @@ "Define the compression strategy to use while writing data. \n" + "This changes the compression level of higher level compression codec (like ZLIB)."), + HIVE_ORC_SPLIT_STRATEGY("hive.exec.orc.split.strategy", "HYBRID", new StringSet("HYBRID", "BI", "ETL"), + "This is not a user level config. BI strategy is used when the requirement is to spend less time in split generation" + + " as opposed to query execution (split generation does not read or cache file footers)." + + " ETL strategy is used when spending little more time in split generation is acceptable" + + " (split generation reads and caches file footers). HYBRID chooses between the above strategies" + + " based on heuristics."), + HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false, "If turned on splits generated by orc will include metadata about the stripes in the file. This\n" + "data is read remotely (from the client or HS2 machine) and sent to all the tasks."), @@ -1252,6 +1260,8 @@ HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0, "Standard error expressed in percentage. Provides a tradeoff between accuracy and compute cost. \n" + "A lower value for error indicates higher accuracy and a higher compute cost."), + HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION("hive.metastore.stats.ndv.densityfunction", false, + "Whether to use density function to estimate the NDV for the whole table based on the NDV of partitions"), HIVE_STATS_KEY_PREFIX_MAX_LENGTH("hive.stats.key.prefix.max.length", 150, "Determines if when the prefix of the key used for intermediate stats collection\n" + "exceeds a certain length, a hash of the key is used instead. If the value < 0 then hashing"), @@ -1596,8 +1606,14 @@ "${system:java.io.tmpdir}" + File.separator + "${system:user.name}" + File.separator + "operation_logs", "Top level directory where operation logs are stored if logging functionality is enabled"), - HIVE_SERVER2_LOGGING_OPERATION_VERBOSE("hive.server2.logging.operation.verbose", false, - "When true, HS2 operation logs available for clients will be verbose"), + HIVE_SERVER2_LOGGING_OPERATION_LEVEL("hive.server2.logging.operation.level", "EXECUTION", + new StringSet("NONE", "EXECUTION", "PERFORMANCE", "VERBOSE"), + "HS2 operation logging mode available to clients to be set at session level.\n" + + "For this to work, hive.server2.logging.operation.enabled should be set to true.\n" + + " NONE: Ignore any logging\n" + + " EXECUTION: Log completion of tasks\n" + + " PERFORMANCE: Execution + Performance logs \n" + + " VERBOSE: All logs" ), // logging configuration HIVE_LOG4J_FILE("hive.log4j.file", "", "Hive log4j configuration file.\n" + @@ -1613,6 +1629,9 @@ HIVE_LOG_EXPLAIN_OUTPUT("hive.log.explain.output", false, "Whether to log explain output for every query.\n" + "When enabled, will log EXPLAIN EXTENDED output for the query at INFO log4j log level."), + HIVE_EXPLAIN_USER("hive.explain.user", false, + "Whether to show explain result at user level.\n" + + "When enabled, will log EXPLAIN output for the query at user level."), // prefix used to auto generated column aliases (this should be started with '_') HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL("hive.autogen.columnalias.prefix.label", "_c", @@ -1688,10 +1707,6 @@ "Path component of URL endpoint when in HTTP mode."), HIVE_SERVER2_THRIFT_MAX_MESSAGE_SIZE("hive.server2.thrift.max.message.size", 100*1024*1024, "Maximum message size in bytes a HS2 server will accept."), - HIVE_SERVER2_THRIFT_HTTP_MIN_WORKER_THREADS("hive.server2.thrift.http.min.worker.threads", 5, - "Minimum number of worker threads when in HTTP mode."), - HIVE_SERVER2_THRIFT_HTTP_MAX_WORKER_THREADS("hive.server2.thrift.http.max.worker.threads", 500, - "Maximum number of worker threads when in HTTP mode."), HIVE_SERVER2_THRIFT_HTTP_MAX_IDLE_TIME("hive.server2.thrift.http.max.idle.time", "1800s", new TimeValidator(TimeUnit.MILLISECONDS), "Maximum idle time for a connection on the server when in HTTP mode."), @@ -1842,6 +1857,10 @@ "Operation will be closed when it's not accessed for this duration of time, which can be disabled by setting to zero value.\n" + " With positive value, it's checked for operations in terminal state only (FINISHED, CANCELED, CLOSED, ERROR).\n" + " With negative value, it's checked for all of the operations regardless of state."), + HIVE_SERVER2_IDLE_SESSION_CHECK_OPERATION("hive.server2.idle.session.check.operation", false, + "Session will be considered to be idle only if there is no activity, and there is no pending operation.\n" + + "This setting takes effect only if session idle timeout (hive.server2.idle.session.timeout) and checking\n" + + "(hive.server2.session.check.interval) are enabled."), HIVE_CONF_RESTRICTED_LIST("hive.conf.restricted.list", "hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role", Index: common/src/java/org/apache/hive/common/util/DateUtils.java =================================================================== --- common/src/java/org/apache/hive/common/util/DateUtils.java (revision 1673556) +++ common/src/java/org/apache/hive/common/util/DateUtils.java (working copy) @@ -65,5 +65,12 @@ long totalNanos) { intervalDayTime.set(totalNanos / NANOS_PER_SEC, (int) (totalNanos % NANOS_PER_SEC)); } + + public static long getIntervalDayTimeTotalSecondsFromTotalNanos(long totalNanos) { + return totalNanos / NANOS_PER_SEC; + } + + public static int getIntervalDayTimeNanosFromTotalNanos(long totalNanos) { + return (int) (totalNanos % NANOS_PER_SEC); + } } - Index: conf/ivysettings.xml =================================================================== --- conf/ivysettings.xml (revision 0) +++ conf/ivysettings.xml (working copy) @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + Index: data/conf/hive-log4j.properties =================================================================== --- data/conf/hive-log4j.properties (revision 1673556) +++ data/conf/hive-log4j.properties (working copy) @@ -91,7 +91,7 @@ log4j.logger.org.apache.zookeeper.ClientCnxn=WARN,DRFA log4j.logger.org.apache.zookeeper.ClientCnxnSocket=WARN,DRFA log4j.logger.org.apache.zookeeper.ClientCnxnSocketNIO=WARN,DRFA -log4j.logger.org.apache.hadoop.hive.ql.log.PerfLogger=WARN,DRFA +log4j.logger.org.apache.hadoop.hive.ql.log.PerfLogger=${hive.ql.log.PerfLogger.level} log4j.logger.org.apache.hadoop.hive.ql.exec.Operator=INFO,DRFA log4j.logger.org.apache.hadoop.hive.serde2.lazy=INFO,DRFA log4j.logger.org.apache.hadoop.hive.metastore.ObjectStore=INFO,DRFA Index: data/conf/hive-site.xml =================================================================== --- data/conf/hive-site.xml (revision 1673556) +++ data/conf/hive-site.xml (working copy) @@ -240,6 +240,11 @@ Using dummy param to test server specific configuration + + hive.ql.log.PerfLogger.level + WARN,DRFA + Used to change the perflogger level + hive.fetch.task.conversion Index: data/files/HiveGroup.parquet =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: data/files/HiveGroup.parquet =================================================================== --- data/files/HiveGroup.parquet (revision 1673556) +++ data/files/HiveGroup.parquet (working copy) Property changes on: data/files/HiveGroup.parquet ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +application/octet-stream \ No newline at end of property Index: data/files/extrapolate_stats_partial_ndv.txt =================================================================== --- data/files/extrapolate_stats_partial_ndv.txt (revision 0) +++ data/files/extrapolate_stats_partial_ndv.txt (working copy) @@ -0,0 +1,20 @@ +|1|1.0E3|94087|2000 +O|2|1.01E3|94086|2000 +|1|0.01E3|94087|2001 +H|2|2.0E3|94086|2001 +|3|1.0E3|94086|2001 +OH|4|1.01E3|43201|2001 +oh1|1|1.0E2|94087|2002 +OH2|2|9.0E2|43201|2002 +oh3|3|1.0E2|94087|2002 +OH4|4|9.1E2|94086|2002 +oh5|4|9.0E2|43201|2002 +OH6|5|0.01E3|94087|2002 +|31|1.0E3|94087|2003 +OH33|1|1.01E3|43201|2003 +|3|2.0E3|94087|2003 +OH|1|1.0E3|94086|2003 +|4|2.0E3|43201|2003 +OH|1|1.0E3|94087|2003 +|1|2.0E3|43201|2003 +OH|5|1.0E3|94086|2003 Index: data/files/parquet_types.txt =================================================================== --- data/files/parquet_types.txt (revision 1673556) +++ data/files/parquet_types.txt (working copy) @@ -1,22 +1,22 @@ -100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a |B4F3CAFDBEDD|k1:v1|101,200|10,abc|2011-01-01 -101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab |68692CCAC0BDE7|k2:v2|102,200|10,def|2012-02-02 -102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc|B4F3CAFDBEDD|k3:v3|103,200|10,ghi|2013-03-03 -103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd|68692CCAC0BDE7|k4:v4|104,200|10,jkl|2014-04-04 -104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde|B4F3CAFDBEDD|k5:v5|105,200|10,mno|2015-05-05 -105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef|68692CCAC0BDE7|k6:v6|106,200|10,pqr|2016-06-06 -106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg|B4F3CAFDBEDD|k7:v7|107,200|10,stu|2017-07-07 -107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh|68692CCAC0BDE7|k8:v8|108,200|10,vwx|2018-08-08 -108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|B4F3CAFDBEDD|68656C6C6F|k9:v9|109,200|10,yza|2019-09-09 -109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef|68692CCAC0BDE7|k10:v10|110,200|10,bcd|2020-10-10 -110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede|B4F3CAFDBEDD|k11:v11|111,200|10,efg|2021-11-11 -111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded|68692CCAC0BDE7|k12:v12|112,200|10,hij|2022-12-12 -112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd|B4F3CAFDBEDD|k13:v13|113,200|10,klm|2023-01-02 -113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc|68692CCAC0BDE7|k14:v14|114,200|10,nop|2024-02-02 -114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b|B4F3CAFDBEDD|k15:v15|115,200|10,qrs|2025-03-03 -115|1|1|1.0|4.5|qrs|2026-04-04 16:16:16.161616161|rstuv|abcded|68692CCAC0BDE7|k16:v16|116,200|10,qrs|2026-04-04 -116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded|B4F3CAFDBEDD|k17:v17|117,200|10,wxy|2027-05-05 -117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded|68692CCAC0BDE7|k18:v18|118,200|10,zab|2028-06-06 -118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede|B4F3CAFDBEDD|k19:v19|119,200|10,cde|2029-07-07 -119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede|68692CCAC0BDE7|k20:v20|120,200|10,fgh|2030-08-08 -120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde|B4F3CAFDBEDD|k21:v21|121,200|10,ijk|2031-09-09 -121|1|2|1.1|6.3|lmn|2032-10-10 22:22:22.222222222|bcdef|abcde||k22:v22|122,200|10,lmn|2032-10-10 \ No newline at end of file +100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a |B4F3CAFDBEDD|k1:v1|101,200|10,abc|2011-01-01|48.88 +101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab |68692CCAC0BDE7|k2:v2|102,200|10,def|2012-02-02|8.72 +102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc|B4F3CAFDBEDD|k3:v3|103,200|10,ghi|2013-03-03|90.21 +103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd|68692CCAC0BDE7|k4:v4|104,200|10,jkl|2014-04-04|3.89 +104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde|B4F3CAFDBEDD|k5:v5|105,200|10,mno|2015-05-05|56.23 +105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef|68692CCAC0BDE7|k6:v6|106,200|10,pqr|2016-06-06|90.21 +106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg|B4F3CAFDBEDD|k7:v7|107,200|10,stu|2017-07-07|6.09 +107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh|68692CCAC0BDE7|k8:v8|108,200|10,vwx|2018-08-08|9.44 +108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|B4F3CAFDBEDD|68656C6C6F|k9:v9|109,200|10,yza|2019-09-09|77.54 +109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef|68692CCAC0BDE7|k10:v10|110,200|10,bcd|2020-10-10|25.42 +110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede|B4F3CAFDBEDD|k11:v11|111,200|10,efg|2021-11-11|60.12 +111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded|68692CCAC0BDE7|k12:v12|112,200|10,hij|2022-12-12|49.56 +112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd|B4F3CAFDBEDD|k13:v13|113,200|10,klm|2023-01-02|80.76 +113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc|68692CCAC0BDE7|k14:v14|114,200|10,nop|2024-02-02|23.23 +114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b|B4F3CAFDBEDD|k15:v15|115,200|10,qrs|2025-03-03|1.01 +115|1|1|1.0|4.5|qrs|2026-04-04 16:16:16.161616161|rstuv|abcded|68692CCAC0BDE7|k16:v16|116,200|10,qrs|2026-04-04|5.98 +116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded|B4F3CAFDBEDD|k17:v17|117,200|10,wxy|2027-05-05|11.22 +117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded|68692CCAC0BDE7|k18:v18|118,200|10,zab|2028-06-06|9.88 +118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede|B4F3CAFDBEDD|k19:v19|119,200|10,cde|2029-07-07|4.76 +119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede|68692CCAC0BDE7|k20:v20|120,200|10,fgh|2030-08-08|12.83 +120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde|B4F3CAFDBEDD|k21:v21|121,200|10,ijk|2031-09-09|73.04 +121|1|2|1.1|6.3|lmn|2032-10-10 22:22:22.222222222|bcdef|abcde||k22:v22|122,200|10,lmn|2032-10-10|90.33 \ No newline at end of file Index: hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java =================================================================== --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java (revision 1673556) +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java (working copy) @@ -510,8 +510,9 @@ } final Path finalOutputPath = getFinalPath(fs, file, srcDir, destDir, immutable); + FileStatus fileStatus = fs.getFileStatus(file); - if (fs.isFile(file)) { + if (fileStatus.isFile()) { if (dryRun){ if (immutable){ // Dryrun checks are meaningless for mutable table - we should always succeed @@ -541,7 +542,7 @@ } } } - } else if(fs.getFileStatus(file).isDir()) { + } else if (fileStatus.isDirectory()) { FileStatus[] children = fs.listStatus(file); FileStatus firstChild = null; Index: hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximInputFormat.java.broken =================================================================== --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximInputFormat.java.broken (revision 1673556) +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximInputFormat.java.broken (working copy) @@ -1,141 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.ql.parse.EximUtil; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; - -/** The InputFormat to use to read data from HCat */ -public class HCatEximInputFormat extends HCatBaseInputFormat { - - /** - * Set the input to use for the Job. This queries the metadata file with - * the specified partition predicates, gets the matching partitions, puts - * the information in the conf object. The inputInfo object is updated with - * information needed in the client context - * - * @param job the job object - * @return two hcat schemas, for the table columns and the partition keys - * @throws IOException - * the exception in communicating with the metadata server - */ - public static List setInput(Job job, - String location, - Map partitionFilter) throws IOException { - FileSystem fs; - try { - fs = FileSystem.get(new URI(location), job.getConfiguration()); - } catch (URISyntaxException e) { - throw new IOException(e); - } - Path fromPath = new Path(location); - Path metadataPath = new Path(fromPath, "_metadata"); - try { - Map.Entry> tp = EximUtil - .readMetaData(fs, metadataPath); - org.apache.hadoop.hive.metastore.api.Table table = tp.getKey(); - InputJobInfo inputInfo = InputJobInfo.create(table.getDbName(), table.getTableName(),null,null,null); - List partCols = table.getPartitionKeys(); - List partInfoList = null; - if (partCols.size() > 0) { - List partColNames = new ArrayList(partCols.size()); - for (FieldSchema fsc : partCols) { - partColNames.add(fsc.getName()); - } - List partitions = tp.getValue(); - partInfoList = filterPartitions(partitionFilter, partitions, table.getPartitionKeys()); - } else { - partInfoList = new ArrayList(1); - HCatSchema schema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getSd().getCols())); - Map parameters = table.getParameters(); - String inputStorageDriverClass = null; - if (parameters.containsKey(HCatConstants.HCAT_ISD_CLASS)){ - inputStorageDriverClass = parameters.get(HCatConstants.HCAT_ISD_CLASS); - }else{ - throw new IOException("No input storage driver classname found, cannot read partition"); - } - Properties hcatProperties = new Properties(); - for (String key : parameters.keySet()){ - if (key.startsWith(InitializeInput.HCAT_KEY_PREFIX)){ - hcatProperties.put(key, parameters.get(key)); - } - } - PartInfo partInfo = new PartInfo(schema, inputStorageDriverClass, location + "/data", hcatProperties); - partInfoList.add(partInfo); - } - inputInfo.setPartitions(partInfoList); - inputInfo.setTableInfo(HCatTableInfo.valueOf(table)); - job.getConfiguration().set( - HCatConstants.HCAT_KEY_JOB_INFO, - HCatUtil.serialize(inputInfo)); - List rv = new ArrayList(2); - rv.add(HCatSchemaUtils.getHCatSchema(table.getSd().getCols())); - rv.add(HCatSchemaUtils.getHCatSchema(partCols)); - return rv; - } catch(SemanticException e) { - throw new IOException(e); - } - } - - private static List filterPartitions(Map partitionFilter, - List partitions, List partCols) throws IOException { - List partInfos = new LinkedList(); - for (Partition partition : partitions) { - boolean matches = true; - List partVals = partition.getValues(); - assert partCols.size() == partVals.size(); - Map partSpec = EximUtil.makePartSpec(partCols, partVals); - if (partitionFilter != null) { - for (Map.Entry constraint : partitionFilter.entrySet()) { - String partVal = partSpec.get(constraint.getKey()); - if ((partVal == null) || !partVal.equals(constraint.getValue())) { - matches = false; - break; - } - } - } - if (matches) { - PartInfo partInfo = InitializeInput.extractPartInfo(partition.getSd(), - partition.getParameters()); - partInfo.setPartitionValues(partSpec); - partInfos.add(partInfo); - } - } - return partInfos; - } -} Index: hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken =================================================================== --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken (revision 1673556) +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken (working copy) @@ -1,166 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.EximUtil; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobStatus; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatException; - -public class HCatEximOutputCommitter extends OutputCommitter { - - private static final Log LOG = LogFactory.getLog(HCatEximOutputCommitter.class); - - private final OutputCommitter baseCommitter; - - public HCatEximOutputCommitter(JobContext context, OutputCommitter baseCommitter) { - this.baseCommitter = baseCommitter; - } - - @Override - public void abortTask(TaskAttemptContext context) throws IOException { - baseCommitter.abortTask(context); - } - - @Override - public void commitTask(TaskAttemptContext context) throws IOException { - baseCommitter.commitTask(context); - } - - @Override - public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { - return baseCommitter.needsTaskCommit(context); - } - - @Override - public void setupJob(JobContext context) throws IOException { - if( baseCommitter != null ) { - baseCommitter.setupJob(context); - } - } - - @Override - public void setupTask(TaskAttemptContext context) throws IOException { - baseCommitter.setupTask(context); - } - - @Override - public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { - if(baseCommitter != null) { - baseCommitter.abortJob(jobContext, state); - } - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); - - Path src = new Path(jobInfo.getLocation()); - FileSystem fs = src.getFileSystem(jobContext.getConfiguration()); - fs.delete(src, true); - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - if(baseCommitter != null) { - baseCommitter.commitJob(jobContext); - } - } - - @Override - public void cleanupJob(JobContext jobContext) throws IOException { - LOG.info("HCatEximOutputCommitter.cleanup invoked; m.o.d : " + - jobContext.getConfiguration().get("mapred.output.dir")); - if (baseCommitter != null) { - LOG.info("baseCommitter.class = " + baseCommitter.getClass().getName()); - baseCommitter.cleanupJob(jobContext); - } - - OutputJobInfo jobInfo = HCatBaseOutputFormat.getJobInfo(jobContext); - Configuration conf = jobContext.getConfiguration(); - FileSystem fs; - try { - fs = FileSystem.get(new URI(jobInfo.getTableInfo().getTable().getSd().getLocation()), conf); - } catch (URISyntaxException e) { - throw new IOException(e); - } - doCleanup(jobInfo, fs); - } - - private static void doCleanup(OutputJobInfo jobInfo, FileSystem fs) throws IOException, - HCatException { - try { - Table ttable = jobInfo.getTableInfo().getTable(); - org.apache.hadoop.hive.ql.metadata.Table table = new org.apache.hadoop.hive.ql.metadata.Table( - ttable); - StorageDescriptor tblSD = ttable.getSd(); - Path tblPath = new Path(tblSD.getLocation()); - Path path = new Path(tblPath, "_metadata"); - List tpartitions = null; - try { - Map.Entry> rv = EximUtil - .readMetaData(fs, path); - tpartitions = rv.getValue(); - } catch (IOException e) { - } - List partitions = - new ArrayList(); - if (tpartitions != null) { - for (Partition tpartition : tpartitions) { - partitions.add(new org.apache.hadoop.hive.ql.metadata.Partition(table, tpartition)); - } - } - if (!table.getPartitionKeys().isEmpty()) { - Map partitionValues = jobInfo.getPartitionValues(); - org.apache.hadoop.hive.ql.metadata.Partition partition = - new org.apache.hadoop.hive.ql.metadata.Partition(table, - partitionValues, - new Path(tblPath, Warehouse.makePartPath(partitionValues))); - partition.getTPartition().setParameters(table.getParameters()); - partitions.add(partition); - } - EximUtil.createExportDump(fs, path, (table), partitions); - } catch (SemanticException e) { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } catch (HiveException e) { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } catch (MetaException e) { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } - } -} Index: hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputFormat.java.broken =================================================================== --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputFormat.java.broken (revision 1673556) +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputFormat.java.broken (working copy) @@ -1,176 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.TreeMap; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.apache.hcatalog.rcfile.RCFileInputDriver; -import org.apache.hcatalog.rcfile.RCFileOutputDriver; - -/** - * The OutputFormat to use to write data to HCat without a hcat server. This can then - * be imported into a hcat instance, or used with a HCatEximInputFormat. As in - * HCatOutputFormat, the key value is ignored and - * and should be given as null. The value is the HCatRecord to write. - */ -public class HCatEximOutputFormat extends HCatBaseOutputFormat { - - private static final Log LOG = LogFactory.getLog(HCatEximOutputFormat.class); - - /** - * Get the record writer for the job. Uses the Table's default OutputStorageDriver - * to get the record writer. - * - * @param context - * the information about the current task. - * @return a RecordWriter to write the output for the job. - * @throws IOException - */ - @Override - public RecordWriter, HCatRecord> - getRecordWriter(TaskAttemptContext context - ) throws IOException, InterruptedException { - return getOutputFormat(context).getRecordWriter(context); - } - - /** - * Get the output committer for this output format. This is responsible - * for ensuring the output is committed correctly. - * @param context the task context - * @return an output committer - * @throws IOException - * @throws InterruptedException - */ - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { - return new HCatEximOutputCommitter(context,((OutputCommitterContainer)getOutputFormat(context).getOutputCommitter(context)).getBaseOutputCommitter()); - } - - /** - * Check for validity of the output-specification for the job. - * @param context information about the job - * @throws IOException when output should not be attempted - */ - @Override - public void checkOutputSpecs(JobContext context - ) throws IOException, InterruptedException { - ((OutputFormatContainer)getOutputFormat(context)).getBaseOutputFormat().checkOutputSpecs(context); - } - - public static void setOutput(Job job, String dbname, String tablename, String location, - HCatSchema partitionSchema, List partitionValues, HCatSchema columnSchema) throws HCatException { - setOutput(job, dbname, tablename, location, partitionSchema, partitionValues, columnSchema, - RCFileInputDriver.class.getName(), - RCFileOutputDriver.class.getName(), - RCFileInputFormat.class.getName(), - RCFileOutputFormat.class.getName(), - ColumnarSerDe.class.getName()); - } - - @SuppressWarnings("unchecked") - public static void setOutput(Job job, String dbname, String tablename, String location, - HCatSchema partitionSchema, - List partitionValues, - HCatSchema columnSchema, - String isdname, String osdname, - String ifname, String ofname, - String serializationLib) throws HCatException { - Map partSpec = new TreeMap(); - List partKeys = null; - if (partitionSchema != null) { - partKeys = partitionSchema.getFields(); - if (partKeys.size() != partitionValues.size()) { - throw new IllegalArgumentException("Partition key size differs from partition value size"); - } - for (int i = 0; i < partKeys.size(); ++i) { - HCatFieldSchema partKey = partKeys.get(i); - if (partKey.getType() != HCatFieldSchema.Type.STRING) { - throw new IllegalArgumentException("Partition key type string is only supported"); - } - partSpec.put(partKey.getName(), partitionValues.get(i)); - } - } - StorerInfo storerInfo = new StorerInfo(isdname, osdname, new Properties()); - OutputJobInfo outputJobInfo = OutputJobInfo.create(dbname,tablename,partSpec,null,null); - org.apache.hadoop.hive.ql.metadata.Table tbl = new - org.apache.hadoop.hive.ql.metadata.Table(dbname, tablename); - Table table = tbl.getTTable(); - table.getParameters().put(HCatConstants.HCAT_ISD_CLASS, isdname); - table.getParameters().put(HCatConstants.HCAT_OSD_CLASS, osdname); - try { - String partname = null; - if ((partKeys != null) && !partKeys.isEmpty()) { - List partSchema = HCatSchemaUtils.getFieldSchemas(partKeys); - table.setPartitionKeys(partSchema); - partname = Warehouse.makePartName(partSchema, partitionValues); - } else { - partname = "data"; - } - StorageDescriptor sd = table.getSd(); - sd.setLocation(location); - String dataLocation = location + "/" + partname; - outputJobInfo.setTableInfo(new HCatTableInfo(dbname,tablename,columnSchema,null,storerInfo,table)); - outputJobInfo.setOutputSchema(columnSchema); - outputJobInfo.setLocation(dataLocation); - setPartDetails(outputJobInfo, columnSchema, partSpec); - sd.setCols(HCatUtil.getFieldSchemaList(outputJobInfo.getOutputSchema().getFields())); - sd.setInputFormat(ifname); - sd.setOutputFormat(ofname); - SerDeInfo serdeInfo = sd.getSerdeInfo(); - serdeInfo.setSerializationLib(serializationLib); - Configuration conf = job.getConfiguration(); - conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); - } catch (MetaException e) { - throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); - } - } -} Index: hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestEximSemanticAnalysis.java.broken =================================================================== --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestEximSemanticAnalysis.java.broken (revision 1673556) +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestEximSemanticAnalysis.java.broken (working copy) @@ -1,175 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.cli; - -import java.io.IOException; -import java.net.URI; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.MiniCluster; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -public class TestEximSemanticAnalysis extends TestCase { - - private final MiniCluster cluster = MiniCluster.buildCluster(); - private HiveConf hcatConf; - private HCatDriver hcatDriver; - private Warehouse wh; - private static final Logger LOG = LoggerFactory.getLogger(TestEximSemanticAnalysis.class); - - @Override - protected void setUp() throws Exception { - - hcatConf = new HiveConf(this.getClass()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatConf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); - URI fsuri = cluster.getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), "/user/hive/warehouse"); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - wh = new Warehouse(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - - hcatDriver = new HCatDriver(); - } - - @Override - protected void tearDown() throws Exception { - } - - public void testExportPerms() throws IOException, MetaException, HiveException { - - hcatDriver.run("drop table junit_sem_analysis"); - CommandProcessorResponse response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis"); - cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwx-wx")); - cluster.getFileSystem().setOwner(whPath, "nosuchuser", "nosuchgroup"); - - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver - .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - - assertEquals(10, response.getResponseCode()); - assertTrue("Permission denied expected : "+response.getErrorMessage(), - response.getErrorMessage().startsWith( - "FAILED: Error in semantic analysis: org.apache.hcatalog.common.HCatException : 3000 : Permission denied")); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver.run("drop table junit_sem_analysis"); - if (response.getResponseCode() != 0) { - LOG.error(response.getErrorMessage()); - fail("Drop table failed"); - } - } - - public void testImportPerms() throws IOException, MetaException, HiveException { - - hcatDriver.run("drop table junit_sem_analysis"); - CommandProcessorResponse response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver - .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - assertEquals(0, response.getResponseCode()); - response = hcatDriver.run("drop table junit_sem_analysis"); - assertEquals(0, response.getResponseCode()); - response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis"); - cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwxr-x")); - cluster.getFileSystem().setOwner(whPath, "nosuchuser", "nosuchgroup"); - - response = hcatDriver - .run("import table junit_sem_analysis from 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - - assertEquals(10, response.getResponseCode()); - assertTrue( - "Permission denied expected: "+response.getErrorMessage() , - response.getErrorMessage().startsWith( - "FAILED: Error in semantic analysis: org.apache.hcatalog.common.HCatException : 3000 : Permission denied")); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - - cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwxrwx")); - response = hcatDriver.run("drop table junit_sem_analysis"); - if (response.getResponseCode() != 0) { - LOG.error(response.getErrorMessage()); - fail("Drop table failed"); - } - } - - public void testImportSetPermsGroup() throws IOException, MetaException, HiveException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("drop table junit_sem_analysis_imported"); - CommandProcessorResponse response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver - .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - assertEquals(0, response.getResponseCode()); - response = hcatDriver.run("drop table junit_sem_analysis"); - assertEquals(0, response.getResponseCode()); - - hcatConf.set(HCatConstants.HCAT_PERMS, "-rwxrw-r--"); - hcatConf.set(HCatConstants.HCAT_GROUP, "nosuchgroup"); - - response = hcatDriver - .run("import table junit_sem_analysis_imported from 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - assertEquals(0, response.getResponseCode()); - - Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis_imported"); - assertEquals(FsPermission.valueOf("-rwxrw-r--"), cluster.getFileSystem().getFileStatus(whPath).getPermission()); - assertEquals("nosuchgroup", cluster.getFileSystem().getFileStatus(whPath).getGroup()); - - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - - response = hcatDriver.run("drop table junit_sem_analysis_imported"); - if (response.getResponseCode() != 0) { - LOG.error(response.getErrorMessage()); - fail("Drop table failed"); - } - } - - -} - Index: hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestStorageHandlerProperties.java.broken =================================================================== --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestStorageHandlerProperties.java.broken (revision 1673556) +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestStorageHandlerProperties.java.broken (working copy) @@ -1,86 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.cli; - -import static org.junit.Assert.assertEquals; - -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.thrift.TException; - -import junit.framework.TestCase; - -public class TestStorageHandlerProperties extends TestCase { - - private Driver hcatDriver; - private Driver hiveDriver; - private HiveMetaStoreClient msc; - - protected void setUp() throws Exception { - HiveConf hcatConf = new HiveConf(this.getClass()); - hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - - HiveConf hiveConf = new HiveConf(hcatConf,this.getClass()); - hiveDriver = new Driver(hiveConf); - - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - - msc = new HiveMetaStoreClient(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - } - - public void testTableProperties() throws CommandNeedRetryException, MetaException ,TException, NoSuchObjectException{ - hcatDriver.run("drop table test_table"); - CommandProcessorResponse response = hcatDriver - .run("create table test_table(key int, value string) STORED BY " + - "'org.apache.hcatalog.cli.DummyStorageHandler' "); - - assertEquals(0, response.getResponseCode()); - Table tbl = msc.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "test_table"); - DummyStorageHandler dsh = new DummyStorageHandler(); - assertTrue(tbl.getParameters().containsKey(HCatConstants.HCAT_ISD_CLASS)); - assertTrue(tbl.getParameters().containsKey(HCatConstants.HCAT_OSD_CLASS)); - assertEquals(tbl.getParameters().get(HCatConstants.HCAT_ISD_CLASS), dsh.getInputStorageDriver().getName()); - assertEquals(tbl.getParameters().get(HCatConstants.HCAT_OSD_CLASS), dsh.getOutputStorageDriver().getName()); - } - - /* @throws java.lang.Exception - * @see junit.framework.TestCase#tearDown() - */ - protected void tearDown() throws Exception { - super.tearDown(); - } - -} Index: hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapred/TestHiveHCatInputFormat.java.broken =================================================================== --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapred/TestHiveHCatInputFormat.java.broken (revision 1673556) +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapred/TestHiveHCatInputFormat.java.broken (working copy) @@ -1,193 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapred; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Properties; - -import junit.framework.TestCase; - -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hcatalog.MiniCluster; -import org.apache.hcatalog.data.HCatDataCheckUtil; -import org.apache.hcatalog.mapred.HCatMapredInputFormat; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.storagehandler.HCatStorageHandlerImpl; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.impl.util.UDFContext; - -public class TestHiveHCatInputFormat extends TestCase { - private static MiniCluster cluster = MiniCluster.buildCluster(); - private static Driver driver; - - String PTNED_TABLE = "junit_testhiveinputintegration_ptni"; - String UNPTNED_TABLE = "junit_testhiveinputintegration_noptn"; - String basicFile = "/tmp/"+PTNED_TABLE+".file"; - - public void testFromHive() throws Exception { - if (driver == null){ - driver = HCatDataCheckUtil.instantiateDriver(cluster); - } - - Properties props = new Properties(); - props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); - String basicFileFullName = cluster.getProperties().getProperty("fs.default.name") + basicFile; - - cleanup(); - - // create source data file - HCatDataCheckUtil.generateDataFile(cluster,basicFile); - - String createPtnedTable = "(j int, s string) partitioned by (i int) " - +"stored by '"+HCatStorageHandlerImpl.class.getName()+"' tblproperties" - + "('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," - + "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') "; - - HCatDataCheckUtil.createTable(driver,PTNED_TABLE,createPtnedTable); - - String createUnptnedTable = "(i int, j int, s string) " - +"stored by '"+HCatStorageHandlerImpl.class.getName()+"' tblproperties" - + "('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," - + "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') "; - - HCatDataCheckUtil.createTable(driver,UNPTNED_TABLE,createUnptnedTable); - - - driver.run("describe extended "+UNPTNED_TABLE); - ArrayList des_values = new ArrayList(); - driver.getResults(des_values); - for (String s : des_values){ - System.err.println("du:"+s); - } - - driver.run("describe extended "+PTNED_TABLE); - ArrayList des2_values = new ArrayList(); - driver.getResults(des2_values); - for (String s : des2_values){ - System.err.println("dp:"+s); - } - - // use pig to read from source file and put into this table - - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server.registerQuery("A = load '"+basicFileFullName+"' as (i:int, j:int, s:chararray);"); - server.registerQuery("store A into '"+UNPTNED_TABLE+"' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); - - server.setBatchOn(); - server.registerQuery("A = load '"+basicFileFullName+"' as (i:int, j:int, s:chararray);"); - server.registerQuery("store A into '"+PTNED_TABLE+"' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); - - // partitioned by i - // select * from tbl; - // select j,s,i from tbl; - // select * from tbl where i = 3; - // select j,s,i from tbl where i = 3; - // select * from tbl where j = 3; - // select j,s,i from tbl where j = 3; - - ArrayList p_select_star_nofilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_star_nofilter","select * from "+PTNED_TABLE); - ArrayList p_select_named_nofilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_named_nofilter","select j,s,i from "+PTNED_TABLE); - - assertDataIdentical(p_select_star_nofilter,p_select_named_nofilter,50); - - ArrayList p_select_star_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_star_ptnfilter","select * from "+PTNED_TABLE+" where i = 3"); - ArrayList p_select_named_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_named_ptnfilter","select j,s,i from "+PTNED_TABLE+" where i = 3"); - - assertDataIdentical(p_select_star_ptnfilter,p_select_named_ptnfilter,10); - - ArrayList select_star_nonptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_star_nonptnfilter","select * from "+PTNED_TABLE+" where j = 28"); - ArrayList select_named_nonptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_named_nonptnfilter","select j,s,i from "+PTNED_TABLE+" where j = 28"); - - assertDataIdentical(select_star_nonptnfilter,select_named_nonptnfilter,1); - - // non-partitioned - // select * from tbl; - // select i,j,s from tbl; - // select * from tbl where i = 3; - // select i,j,s from tbl where i = 3; - - // select j,s,i from tbl; - // select j,s,i from tbl where i = 3; - - ArrayList select_star_nofilter = HCatDataCheckUtil.formattedRun(driver, - "select_star_nofilter","select * from "+UNPTNED_TABLE); //i,j,s select * order is diff for unptn - ArrayList select_ijs_nofilter = HCatDataCheckUtil.formattedRun(driver, - "select_ijs_nofilter","select i,j,s from "+UNPTNED_TABLE); - - assertDataIdentical(select_star_nofilter,select_ijs_nofilter,50); - - ArrayList select_star_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_star_ptnfilter","select * from "+UNPTNED_TABLE+" where i = 3"); //i,j,s - ArrayList select_ijs_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_ijs_ptnfilter","select i,j,s from "+UNPTNED_TABLE+" where i = 3"); - - assertDataIdentical(select_star_ptnfilter,select_ijs_ptnfilter,10); - - ArrayList select_jsi_nofilter = HCatDataCheckUtil.formattedRun(driver, - "select_jsi_nofilter","select j,s,i from "+UNPTNED_TABLE); - assertDataIdentical(p_select_named_nofilter,select_jsi_nofilter,50,true); - - ArrayList select_jsi_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_jsi_ptnfilter","select j,s,i from "+UNPTNED_TABLE+" where i = 3"); - assertDataIdentical(p_select_named_ptnfilter,select_jsi_ptnfilter,10,true); - - } - - private void assertDataIdentical(ArrayList result1, - ArrayList result2, int numRecords) { - assertDataIdentical(result1,result2,numRecords,false); - } - - private void assertDataIdentical(ArrayList result1, - ArrayList result2, int numRecords,boolean doSort) { - assertEquals(numRecords, result1.size()); - assertEquals(numRecords, result2.size()); - Collections.sort(result1); - Collections.sort(result2); - for (int i = 0; i < numRecords; i++){ - assertEquals(result1.get(i),result2.get(i)); - } - } - - - private void cleanup() throws IOException, CommandNeedRetryException { - MiniCluster.deleteFile(cluster, basicFile); - HCatDataCheckUtil.dropTable(driver,PTNED_TABLE); - HCatDataCheckUtil.dropTable(driver,UNPTNED_TABLE); - } - -} Index: hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java =================================================================== --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java (revision 1673556) +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java (working copy) @@ -24,7 +24,9 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.WindowsPathUtil; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.util.Shell; import org.apache.hive.hcatalog.common.HCatUtil; import org.apache.pig.ExecType; import org.apache.pig.PigServer; @@ -81,6 +83,10 @@ hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, ""); hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR); + + if (Shell.WINDOWS) { + WindowsPathUtil.convertPathsFromWindowsToHdfs(hiveConf); + } } protected void logAndRegister(PigServer server, String query) throws IOException { Index: hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken =================================================================== --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken (revision 1673556) +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken (working copy) @@ -1,429 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.apache.hcatalog.mapreduce.TestHCatEximInputFormat.TestImport.EmpDetails; - -/** - * - * TestHCatEximInputFormat. tests primarily HCatEximInputFormat but - * also HCatEximOutputFormat. - * - */ -public class TestHCatEximInputFormat extends TestCase { - - public static class TestExport extends - org.apache.hadoop.mapreduce.Mapper { - - private HCatSchema recordSchema; - - @Override - protected void setup(Context context) throws IOException, - InterruptedException { - super.setup(context); - recordSchema = HCatEximOutputFormat.getTableSchema(context); - } - - @Override - public void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - HCatRecord record = new DefaultHCatRecord(recordSchema.size()); - record.setInteger("emp_id", recordSchema, Integer.parseInt(cols[0])); - record.setString("emp_name", recordSchema, cols[1]); - record.setString("emp_dob", recordSchema, cols[2]); - record.setString("emp_sex", recordSchema, cols[3]); - context.write(key, record); - } - } - - public static class TestImport extends - org.apache.hadoop.mapreduce.Mapper< - org.apache.hadoop.io.LongWritable, HCatRecord, - org.apache.hadoop.io.Text, - org.apache.hadoop.io.Text> { - - private HCatSchema recordSchema; - - public static class EmpDetails { - public String emp_name; - public String emp_dob; - public String emp_sex; - public String emp_country; - public String emp_state; - } - - public static Map empRecords = new TreeMap(); - - @Override - protected void setup(Context context) throws IOException, - InterruptedException { - super.setup(context); - try { - recordSchema = HCatBaseInputFormat.getOutputSchema(context); - } catch (Exception e) { - throw new IOException("Error getting outputschema from job configuration", e); - } - System.out.println("RecordSchema : " + recordSchema.toString()); - } - - @Override - public void map(LongWritable key, HCatRecord value, Context context) - throws IOException, InterruptedException { - EmpDetails empDetails = new EmpDetails(); - Integer emp_id = value.getInteger("emp_id", recordSchema); - String emp_name = value.getString("emp_name", recordSchema); - empDetails.emp_name = emp_name; - if (recordSchema.getPosition("emp_dob") != null) { - empDetails.emp_dob = value.getString("emp_dob", recordSchema); - } - if (recordSchema.getPosition("emp_sex") != null) { - empDetails.emp_sex = value.getString("emp_sex", recordSchema); - } - if (recordSchema.getPosition("emp_country") != null) { - empDetails.emp_country = value.getString("emp_country", recordSchema); - } - if (recordSchema.getPosition("emp_state") != null) { - empDetails.emp_state = value.getString("emp_state", recordSchema); - } - empRecords.put(emp_id, empDetails); - } - } - - private static final String dbName = "hcatEximOutputFormatTestDB"; - private static final String tblName = "hcatEximOutputFormatTestTable"; - Configuration conf; - Job job; - List columns; - HCatSchema schema; - FileSystem fs; - Path inputLocation; - Path outputLocation; - private HCatSchema partSchema; - - - @Override - protected void setUp() throws Exception { - System.out.println("Setup started"); - super.setUp(); - conf = new Configuration(); - job = new Job(conf, "test eximinputformat"); - columns = new ArrayList(); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", - Constants.STRING_TYPE_NAME, ""))); - schema = new HCatSchema(columns); - - fs = new LocalFileSystem(); - fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); - inputLocation = new Path(fs.getWorkingDirectory(), "tmp/exports"); - outputLocation = new Path(fs.getWorkingDirectory(), "tmp/data"); - - job.setJarByClass(this.getClass()); - job.setNumReduceTasks(0); - System.out.println("Setup done"); - } - - private void setupMRExport(String[] records) throws IOException { - if (fs.exists(outputLocation)) { - fs.delete(outputLocation, true); - } - FSDataOutputStream ds = fs.create(outputLocation, true); - for (String record : records) { - ds.writeBytes(record); - } - ds.close(); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(HCatEximOutputFormat.class); - TextInputFormat.setInputPaths(job, outputLocation); - job.setMapperClass(TestExport.class); - } - - private void setupMRImport() throws IOException { - if (fs.exists(outputLocation)) { - fs.delete(outputLocation, true); - } - job.setInputFormatClass(HCatEximInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputLocation); - job.setMapperClass(TestImport.class); - TestImport.empRecords.clear(); - } - - - @Override - protected void tearDown() throws Exception { - System.out.println("Teardown started"); - super.tearDown(); - // fs.delete(inputLocation, true); - // fs.delete(outputLocation, true); - System.out.println("Teardown done"); - } - - - private void runNonPartExport() throws IOException, InterruptedException, ClassNotFoundException { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - setupMRExport(new String[] { - "237,Krishna,01/01/1990,M,IN,TN\n", - "238,Kalpana,01/01/2000,F,IN,KA\n", - "239,Satya,01/01/2001,M,US,TN\n", - "240,Kavya,01/01/2002,F,US,KA\n" - - }); - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - inputLocation.toString(), - null, - null, - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - } - - private void runPartExport(String record, String country, String state) throws IOException, InterruptedException, ClassNotFoundException { - setupMRExport(new String[] {record}); - List partValues = new ArrayList(2); - partValues.add(country); - partValues.add(state); - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - inputLocation.toString(), - partSchema , - partValues , - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - } - - public void testNonPart() throws Exception { - try { - runNonPartExport(); - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", null, null); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", null, null); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", null, null); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", null, null); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - public void testNonPartProjection() throws Exception { - try { - - runNonPartExport(); - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - - List readColumns = new ArrayList(); - readColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - readColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - - HCatEximInputFormat.setOutputSchema(job, new HCatSchema(readColumns)); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", null, null, null, null); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", null, null, null, null); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", null, null, null, null); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", null, null, null, null); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - public void testPart() throws Exception { - try { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - - List partKeys = new ArrayList(2); - partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); - partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); - partSchema = new HCatSchema(partKeys); - - runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); - setUp(); - runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); - setUp(); - runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); - setUp(); - runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); - - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", "in", "tn"); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", "us", "tn"); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - public void testPartWithPartCols() throws Exception { - try { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - - List partKeys = new ArrayList(2); - partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); - partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); - partSchema = new HCatSchema(partKeys); - - runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); - setUp(); - runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); - setUp(); - runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); - setUp(); - runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); - - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - - List colsPlusPartKeys = new ArrayList(); - colsPlusPartKeys.addAll(columns); - colsPlusPartKeys.addAll(partKeys); - - HCatBaseInputFormat.setOutputSchema(job, new HCatSchema(colsPlusPartKeys)); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", "in", "tn"); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", "us", "tn"); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - - public void testPartSelection() throws Exception { - try { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - - List partKeys = new ArrayList(2); - partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); - partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); - partSchema = new HCatSchema(partKeys); - - runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); - setUp(); - runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); - setUp(); - runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); - setUp(); - runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); - - setUp(); - setupMRImport(); - Map filter = new TreeMap(); - filter.put("emp_state", "ka"); - HCatEximInputFormat.setInput(job, "tmp/exports", filter); - job.waitForCompletion(true); - - assertEquals(2, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - - private void assertEmpDetail(EmpDetails empDetails, String name, String dob, String mf, String country, String state) { - assertNotNull(empDetails); - assertEquals(name, empDetails.emp_name); - assertEquals(dob, empDetails.emp_dob); - assertEquals(mf, empDetails.emp_sex); - assertEquals(country, empDetails.emp_country); - assertEquals(state, empDetails.emp_state); - } - -} Index: hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken =================================================================== --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken (revision 1673556) +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken (working copy) @@ -1,261 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.parse.EximUtil; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; - -/** - * - * TestHCatEximOutputFormat. Some basic testing here. More testing done via - * TestHCatEximInputFormat - * - */ -public class TestHCatEximOutputFormat extends TestCase { - - public static class TestMap extends - Mapper { - - private HCatSchema recordSchema; - - @Override - protected void setup(Context context) throws IOException, - InterruptedException { - super.setup(context); - recordSchema = HCatEximOutputFormat.getTableSchema(context); - System.out.println("TestMap/setup called"); - } - - @Override - public void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - HCatRecord record = new DefaultHCatRecord(recordSchema.size()); - System.out.println("TestMap/map called. Cols[0]:" + cols[0]); - System.out.println("TestMap/map called. Cols[1]:" + cols[1]); - System.out.println("TestMap/map called. Cols[2]:" + cols[2]); - System.out.println("TestMap/map called. Cols[3]:" + cols[3]); - record.setInteger("emp_id", recordSchema, Integer.parseInt(cols[0])); - record.setString("emp_name", recordSchema, cols[1]); - record.setString("emp_dob", recordSchema, cols[2]); - record.setString("emp_sex", recordSchema, cols[3]); - context.write(key, record); - } - } - - - private static final String dbName = "hcatEximOutputFormatTestDB"; - private static final String tblName = "hcatEximOutputFormatTestTable"; - Configuration conf; - Job job; - List columns; - HCatSchema schema; - FileSystem fs; - Path outputLocation; - Path dataLocation; - - public void testNonPart() throws Exception { - try { - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - outputLocation.toString(), - null, - null, - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - - Path metadataPath = new Path(outputLocation, "_metadata"); - Map.Entry> rv = EximUtil.readMetaData(fs, metadataPath); - Table table = rv.getKey(); - List partitions = rv.getValue(); - - assertEquals(dbName, table.getDbName()); - assertEquals(tblName, table.getTableName()); - assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), - HCatUtil.getFieldSchemaList(columns))); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", - table.getSd().getInputFormat()); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - table.getSd().getOutputFormat()); - assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - table.getSd().getSerdeInfo().getSerializationLib()); - assertEquals(0, table.getPartitionKeys().size()); - - assertEquals(0, partitions.size()); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - - } - - public void testPart() throws Exception { - try { - List partKeys = new ArrayList(); - partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_country", - Constants.STRING_TYPE_NAME, ""))); - partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_state", - Constants.STRING_TYPE_NAME, ""))); - HCatSchema partitionSchema = new HCatSchema(partKeys); - - List partitionVals = new ArrayList(); - partitionVals.add("IN"); - partitionVals.add("TN"); - - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - outputLocation.toString(), - partitionSchema, - partitionVals, - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - Path metadataPath = new Path(outputLocation, "_metadata"); - Map.Entry> rv = EximUtil.readMetaData(fs, metadataPath); - Table table = rv.getKey(); - List partitions = rv.getValue(); - - assertEquals(dbName, table.getDbName()); - assertEquals(tblName, table.getTableName()); - assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), - HCatUtil.getFieldSchemaList(columns))); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", - table.getSd().getInputFormat()); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - table.getSd().getOutputFormat()); - assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - table.getSd().getSerdeInfo().getSerializationLib()); - assertEquals(2, table.getPartitionKeys().size()); - List partSchema = table.getPartitionKeys(); - assertEquals("emp_country", partSchema.get(0).getName()); - assertEquals("emp_state", partSchema.get(1).getName()); - - assertEquals(1, partitions.size()); - Partition partition = partitions.get(0); - assertEquals("IN", partition.getValues().get(0)); - assertEquals("TN", partition.getValues().get(1)); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - @Override - protected void setUp() throws Exception { - System.out.println("Setup started"); - super.setUp(); - conf = new Configuration(); - job = new Job(conf, "test eximoutputformat"); - columns = new ArrayList(); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", - Constants.STRING_TYPE_NAME, ""))); - schema = new HCatSchema(columns); - - fs = new LocalFileSystem(); - fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); - outputLocation = new Path(fs.getWorkingDirectory(), "tmp/exports"); - if (fs.exists(outputLocation)) { - fs.delete(outputLocation, true); - } - dataLocation = new Path(fs.getWorkingDirectory(), "tmp/data"); - if (fs.exists(dataLocation)) { - fs.delete(dataLocation, true); - } - FSDataOutputStream ds = fs.create(dataLocation, true); - ds.writeBytes("237,Krishna,01/01/1990,M,IN,TN\n"); - ds.writeBytes("238,Kalpana,01/01/2000,F,IN,KA\n"); - ds.writeBytes("239,Satya,01/01/2001,M,US,TN\n"); - ds.writeBytes("240,Kavya,01/01/2002,F,US,KA\n"); - ds.close(); - - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(HCatEximOutputFormat.class); - TextInputFormat.setInputPaths(job, dataLocation); - job.setJarByClass(this.getClass()); - job.setMapperClass(TestMap.class); - job.setNumReduceTasks(0); - System.out.println("Setup done"); - } - - @Override - protected void tearDown() throws Exception { - System.out.println("Teardown started"); - super.tearDown(); - fs.delete(dataLocation, true); - fs.delete(outputLocation, true); - System.out.println("Teardown done"); - } -} Index: hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken =================================================================== --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken (revision 1673556) +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken (working copy) @@ -1,294 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.rcfile; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.*; - -import junit.framework.Assert; -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.io.RCFile; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; -import org.apache.hadoop.io.compress.DefaultCodec; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatDataCheckUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.rcfile.RCFileInputDriver; -import org.apache.hadoop.hive.shims.ShimLoader; - - -public class TestRCFileInputStorageDriver extends TestCase{ - private static final Configuration conf = new Configuration(); - private static final Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred"); - private static final Path file = new Path(dir, "test_rcfile"); - private final HCatHadoopShims shim = ShimLoader.getHadoopShims().getHCatShim(); - - // Generate sample records to compare against - private byte[][][] getRecords() throws UnsupportedEncodingException { - byte[][] record_1 = {"123".getBytes("UTF-8"), "456".getBytes("UTF-8"), - "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), - "5.3".getBytes("UTF-8"), "hcatalog and hadoop".getBytes("UTF-8"), - new byte[0], "\\N".getBytes("UTF-8")}; - byte[][] record_2 = {"100".getBytes("UTF-8"), "200".getBytes("UTF-8"), - "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), - "5.3".getBytes("UTF-8"), "hcatalog and hadoop".getBytes("UTF-8"), - new byte[0], "\\N".getBytes("UTF-8")}; - return new byte[][][]{record_1, record_2}; - } - - // Write sample records to file for individual tests - private BytesRefArrayWritable[] initTestEnvironment() throws IOException { - FileSystem fs = FileSystem.getLocal(conf); - fs.delete(file, true); - - byte [][][] records = getRecords(); - RCFileOutputFormat.setColumnNumber(conf, 8); - RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); - - BytesRefArrayWritable bytes = writeBytesToFile(records[0], writer); - BytesRefArrayWritable bytes2 = writeBytesToFile(records[1], writer); - - writer.close(); - return new BytesRefArrayWritable[]{bytes,bytes2}; - } - - private BytesRefArrayWritable writeBytesToFile(byte[][] record, RCFile.Writer writer) throws IOException { - BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length); - for (int i = 0; i < record.length; i++) { - BytesRefWritable cu = new BytesRefWritable(record[i], 0, record[i].length); - bytes.set(i, cu); - } - writer.append(bytes); - return bytes; - } - - public void testConvertValueToTuple() throws IOException,InterruptedException{ - BytesRefArrayWritable[] bytesArr = initTestEnvironment(); - - HCatSchema schema = buildHiveSchema(); - RCFileInputDriver sd = new RCFileInputDriver(); - JobContext jc = shim.createJobContext(conf, new JobID()); - sd.setInputPath(jc, file.toString()); - InputFormat iF = sd.getInputFormat(null); - InputSplit split = iF.getSplits(jc).get(0); - sd.setOriginalSchema(jc, schema); - sd.setOutputSchema(jc, schema); - sd.initialize(jc, getProps()); - - TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); - RecordReader rr = iF.createRecordReader(split,tac); - rr.initialize(split, tac); - HCatRecord[] tuples = getExpectedRecords(); - for(int j=0; j < 2; j++){ - Assert.assertTrue(rr.nextKeyValue()); - BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); - Assert.assertEquals(bytesArr[j], w); - HCatRecord t = sd.convertToHCatRecord(null,w); - Assert.assertEquals(8, t.size()); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); - } - } - - public void testPruning() throws IOException,InterruptedException{ - BytesRefArrayWritable[] bytesArr = initTestEnvironment(); - - RCFileInputDriver sd = new RCFileInputDriver(); - JobContext jc = shim.createJobContext(conf, new JobID()); - sd.setInputPath(jc, file.toString()); - InputFormat iF = sd.getInputFormat(null); - InputSplit split = iF.getSplits(jc).get(0); - sd.setOriginalSchema(jc, buildHiveSchema()); - sd.setOutputSchema(jc, buildPrunedSchema()); - - sd.initialize(jc, getProps()); - conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,jc.getConfiguration().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)); - TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); - RecordReader rr = iF.createRecordReader(split,tac); - rr.initialize(split, tac); - HCatRecord[] tuples = getPrunedRecords(); - for(int j=0; j < 2; j++){ - Assert.assertTrue(rr.nextKeyValue()); - BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); - Assert.assertFalse(bytesArr[j].equals(w)); - Assert.assertEquals(w.size(), 8); - HCatRecord t = sd.convertToHCatRecord(null,w); - Assert.assertEquals(5, t.size()); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); - } - assertFalse(rr.nextKeyValue()); - } - - public void testReorderdCols() throws IOException,InterruptedException{ - BytesRefArrayWritable[] bytesArr = initTestEnvironment(); - - RCFileInputDriver sd = new RCFileInputDriver(); - JobContext jc = shim.createJobContext(conf, new JobID()); - sd.setInputPath(jc, file.toString()); - InputFormat iF = sd.getInputFormat(null); - InputSplit split = iF.getSplits(jc).get(0); - sd.setOriginalSchema(jc, buildHiveSchema()); - sd.setOutputSchema(jc, buildReorderedSchema()); - - sd.initialize(jc, getProps()); - Map map = new HashMap(1); - map.put("part1", "first-part"); - sd.setPartitionValues(jc, map); - conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,jc.getConfiguration().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)); - TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); - RecordReader rr = iF.createRecordReader(split,tac); - rr.initialize(split, tac); - HCatRecord[] tuples = getReorderedCols(); - for(int j=0; j < 2; j++){ - Assert.assertTrue(rr.nextKeyValue()); - BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); - Assert.assertFalse(bytesArr[j].equals(w)); - Assert.assertEquals(w.size(), 8); - HCatRecord t = sd.convertToHCatRecord(null,w); - Assert.assertEquals(7, t.size()); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); - } - assertFalse(rr.nextKeyValue()); - } - private HCatRecord[] getExpectedRecords(){ - List rec_1 = new ArrayList(8); - Collections.addAll(rec_1, new Byte("123"), - new Short("456"), - new Integer(789), - new Long(1000L), - new Double(5.3D), - new String("hcatalog and hadoop"), - null, - null); - - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(8); - Collections.addAll(rec_2, new Byte("100"), - new Short("200"), - new Integer(123), - new Long(1000L), - new Double(5.3D), - new String("hcatalog and hadoop"), - null, - null); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - return new HCatRecord[]{tup_1,tup_2}; - } - - private HCatRecord[] getPrunedRecords(){ - List rec_1 = new ArrayList(8); - Collections.addAll(rec_1, new Byte("123"), - new Integer(789), - new Double(5.3D), - new String("hcatalog and hadoop"), - null); - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(8); - Collections.addAll(rec_2, new Byte("100"), - new Integer(123), - new Double(5.3D), - new String("hcatalog and hadoop"), - null); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - return new HCatRecord[]{tup_1,tup_2}; - } - - private HCatSchema buildHiveSchema() throws HCatException{ - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("atinyint", "tinyint", ""), - new FieldSchema("asmallint", "smallint", ""), - new FieldSchema("aint", "int", ""), - new FieldSchema("along", "bigint", ""), - new FieldSchema("adouble", "double", ""), - new FieldSchema("astring", "string", ""), - new FieldSchema("anullint", "int", ""), - new FieldSchema("anullstring", "string", ""))); - } - - private HCatSchema buildPrunedSchema() throws HCatException{ - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("atinyint", "tinyint", ""), - new FieldSchema("aint", "int", ""), - new FieldSchema("adouble", "double", ""), - new FieldSchema("astring", "string", ""), - new FieldSchema("anullint", "int", ""))); - } - - private HCatSchema buildReorderedSchema() throws HCatException{ - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("aint", "int", ""), - new FieldSchema("part1", "string", ""), - new FieldSchema("adouble", "double", ""), - new FieldSchema("newCol", "tinyint", ""), - new FieldSchema("astring", "string", ""), - new FieldSchema("atinyint", "tinyint", ""), - new FieldSchema("anullint", "int", ""))); - } - - private HCatRecord[] getReorderedCols(){ - List rec_1 = new ArrayList(7); - Collections.addAll(rec_1, new Integer(789), - new String("first-part"), - new Double(5.3D), - null, // new column - new String("hcatalog and hadoop"), - new Byte("123"), - null); - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(7); - Collections.addAll(rec_2, new Integer(123), - new String("first-part"), - new Double(5.3D), - null, - new String("hcatalog and hadoop"), - new Byte("100"), - null); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - return new HCatRecord[]{tup_1,tup_2}; - - } - private Properties getProps(){ - Properties props = new Properties(); - props.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); - props.setProperty(Constants.SERIALIZATION_FORMAT, "9"); - return props; - } -} Index: hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken =================================================================== --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken (revision 1673556) +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken (working copy) @@ -1,105 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.rcfile; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputStorageDriver; -import org.apache.hcatalog.mapreduce.HCatOutputStorageDriver; -import org.apache.hcatalog.mapreduce.OutputJobInfo; -import org.apache.hadoop.hive.shims.ShimLoader; - -public class TestRCFileOutputStorageDriver extends TestCase { - - public void testConversion() throws IOException { - Configuration conf = new Configuration(); - JobContext jc = ShimLoader.getHadoopShims().getHCatShim().createJobContext(conf, new JobID()); - String jobString = HCatUtil.serialize(OutputJobInfo.create(null,null,null)); - jc.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_INFO,jobString); - - HCatSchema schema = buildHiveSchema(); - HCatInputStorageDriver isd = new RCFileInputDriver(); - - isd.setOriginalSchema(jc, schema); - isd.setOutputSchema(jc, schema); - isd.initialize(jc, new Properties()); - - byte[][] byteArray = buildBytesArray(); - - BytesRefArrayWritable bytesWritable = new BytesRefArrayWritable(byteArray.length); - for (int i = 0; i < byteArray.length; i++) { - BytesRefWritable cu = new BytesRefWritable(byteArray[i], 0, byteArray[i].length); - bytesWritable.set(i, cu); - } - - //Convert byte array to HCatRecord using isd, convert hcatrecord back to byte array - //using osd, compare the two arrays - HCatRecord record = isd.convertToHCatRecord(null, bytesWritable); - - HCatOutputStorageDriver osd = new RCFileOutputDriver(); - - osd.setSchema(jc, schema); - osd.initialize(jc, new Properties()); - - BytesRefArrayWritable bytesWritableOutput = (BytesRefArrayWritable) osd.convertValue(record); - - assertTrue(bytesWritableOutput.compareTo(bytesWritable) == 0); - } - - private byte[][] buildBytesArray() throws UnsupportedEncodingException { - byte[][] bytes = {"123".getBytes("UTF-8"), "456".getBytes("UTF-8"), - "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), - "5.3".getBytes("UTF-8"), "hcat and hadoop".getBytes("UTF-8"), - new byte[0], "\\N".getBytes("UTF-8") }; - return bytes; - } - - private HCatSchema buildHiveSchema() throws HCatException{ - - List fields = new ArrayList(8); - fields.add(new FieldSchema("atinyint", "tinyint", "")); - fields.add(new FieldSchema("asmallint", "smallint", "")); - fields.add(new FieldSchema("aint", "int", "")); - fields.add(new FieldSchema("along", "bigint", "")); - fields.add(new FieldSchema("adouble", "double", "")); - fields.add(new FieldSchema("astring", "string", "")); - fields.add(new FieldSchema("anullint", "int", "")); - fields.add(new FieldSchema("anullstring", "string", "")); - - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(fields)); - } -} Index: hcatalog/hcatalog-pig-adapter/pom.xml =================================================================== --- hcatalog/hcatalog-pig-adapter/pom.xml (revision 1673556) +++ hcatalog/hcatalog-pig-adapter/pom.xml (working copy) @@ -68,7 +68,6 @@ - hadoop-1 @@ -79,6 +78,12 @@ ${hadoop-20S.version} + org.apache.hadoop + hadoop-test + ${hadoop-20S.version} + test + + org.apache.pig pig ${pig.version} @@ -102,6 +107,11 @@ org.apache.hadoop + hadoop-mapreduce-client-jobclient + ${hadoop-23.version} + + + org.apache.hadoop hadoop-mapreduce-client-core ${hadoop-23.version} @@ -112,6 +122,12 @@ h2 + org.apache.hadoop + hadoop-hdfs + ${hadoop-23.version} + test + + joda-time @@ -121,11 +137,30 @@ org.apache.hadoop + hadoop-hdfs + ${hadoop-23.version} + tests + test + + + org.apache.hadoop hadoop-mapreduce-client-common ${hadoop-23.version} true test + + org.apache.hadoop + hadoop-common + ${hadoop-23.version} + tests + test + + + com.sun.jersey + jersey-servlet + test + Index: hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximLoader.java.broken =================================================================== --- hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximLoader.java.broken (revision 1673556) +++ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximLoader.java.broken (working copy) @@ -1,129 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatBaseInputFormat; -import org.apache.hcatalog.mapreduce.HCatEximInputFormat; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.Job; -import org.apache.pig.Expression; -import org.apache.pig.LoadFunc; -import org.apache.pig.ResourceSchema; -import org.apache.pig.impl.util.UDFContext; - -/** - * Pig {@link LoadFunc} to read data/metadata from hcatalog exported location - */ - -public class HCatEximLoader extends HCatBaseLoader { - - private static final Log LOG = LogFactory.getLog(HCatEximLoader.class); - - private HCatSchema tableSchema; - private HCatSchema partitionSchema; - private HCatEximInputFormat inputFormat; - - public HCatEximLoader() { - LOG.debug("HCatEximLoader ctored"); - } - - @Override - public ResourceSchema getSchema(String location, Job job) throws IOException { - LOG.debug("getSchema with location :" + location); - if (tableSchema == null) { - List rv = HCatEximInputFormat.setInput(job, location, null); - tableSchema = rv.get(0); - partitionSchema = rv.get(1); - } - LOG.debug("getSchema got schema :" + tableSchema.toString()); - List colsPlusPartKeys = new ArrayList(); - colsPlusPartKeys.addAll(tableSchema.getFields()); - colsPlusPartKeys.addAll(partitionSchema.getFields()); - outputSchema = new HCatSchema(colsPlusPartKeys); - return PigHCatUtil.getResourceSchema(outputSchema); - } - - @Override - public String[] getPartitionKeys(String location, Job job) throws IOException { - LOG.warn("getPartitionKeys with location :" + location); - /* - if (tableSchema == null) { - List rv = HCatEximInputFormat.setInput(job, location, null); - tableSchema = rv.get(0); - partitionSchema = rv.get(1); - } - return partitionSchema.getFieldNames().toArray(new String[0]); - */ - return null; - } - - @Override - public void setPartitionFilter(Expression partitionFilter) throws IOException { - LOG.debug("setPartitionFilter with filter :" + partitionFilter.toString()); - } - - @Override - public void setLocation(String location, Job job) throws IOException { - LOG.debug("setLocation with location :" + location); - List rv = HCatEximInputFormat.setInput(job, location, null); - tableSchema = rv.get(0); - partitionSchema = rv.get(1); - List colsPlusPartKeys = new ArrayList(); - colsPlusPartKeys.addAll(tableSchema.getFields()); - colsPlusPartKeys.addAll(partitionSchema.getFields()); - outputSchema = new HCatSchema(colsPlusPartKeys); - UDFContext udfContext = UDFContext.getUDFContext(); - Properties props = udfContext.getUDFProperties(this.getClass(), - new String[] {signature}); - RequiredFieldList requiredFieldsInfo = - (RequiredFieldList) props.get(PRUNE_PROJECTION_INFO); - if (requiredFieldsInfo != null) { - ArrayList fcols = new ArrayList(); - for (RequiredField rf : requiredFieldsInfo.getFields()) { - fcols.add(tableSchema.getFields().get(rf.getIndex())); - } - outputSchema = new HCatSchema(fcols); - try { - HCatBaseInputFormat.setOutputSchema(job, outputSchema); - } catch (Exception e) { - throw new IOException(e); - } - } - } - - - @Override - public InputFormat getInputFormat() throws IOException { - if (inputFormat == null) { - inputFormat = new HCatEximInputFormat(); - } - return inputFormat; - } - -} Index: hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximStorer.java.broken =================================================================== --- hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximStorer.java.broken (revision 1673556) +++ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximStorer.java.broken (working copy) @@ -1,152 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatEximOutputCommitter; -import org.apache.hcatalog.mapreduce.HCatEximOutputFormat; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.pig.ResourceSchema; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.impl.util.ObjectSerializer; -import org.apache.pig.impl.util.UDFContext; - -/** - * HCatEximStorer. - * - */ - -public class HCatEximStorer extends HCatBaseStorer { - - private static final Log LOG = LogFactory.getLog(HCatEximStorer.class); - - private final String outputLocation; - - public HCatEximStorer(String outputLocation) throws Exception { - this(outputLocation, null, null); - } - - public HCatEximStorer(String outputLocation, String partitionSpec) throws Exception { - this(outputLocation, partitionSpec, null); - } - - public HCatEximStorer(String outputLocation, String partitionSpec, String schema) - throws Exception { - super(partitionSpec, schema); - this.outputLocation = outputLocation; - LOG.debug("HCatEximStorer called"); - } - - @Override - public OutputFormat getOutputFormat() throws IOException { - LOG.debug("getOutputFormat called"); - return new HCatEximOutputFormat(); - } - - @Override - public void setStoreLocation(String location, Job job) throws IOException { - LOG.debug("setStoreLocation called with :" + location); - String[] userStr = location.split("\\."); - String dbname = MetaStoreUtils.DEFAULT_DATABASE_NAME; - String tablename = null; - if (userStr.length == 2) { - dbname = userStr[0]; - tablename = userStr[1]; - } else { - tablename = userStr[0]; - } - Properties p = UDFContext.getUDFContext() - .getUDFProperties(this.getClass(), new String[] {sign}); - Configuration config = job.getConfiguration(); - if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { - Schema schema = (Schema) ObjectSerializer.deserialize(p.getProperty(PIG_SCHEMA)); - if (schema != null) { - pigSchema = schema; - } - if (pigSchema == null) { - throw new FrontendException("Schema for data cannot be determined.", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - HCatSchema hcatTblSchema = new HCatSchema(new ArrayList()); - try { - doSchemaValidations(pigSchema, hcatTblSchema); - } catch (HCatException he) { - throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); - } - - List hcatFields = new ArrayList(); - List partVals = new ArrayList(); - for (String key : partitionKeys) { - hcatFields.add(new HCatFieldSchema(key, HCatFieldSchema.Type.STRING, "")); - partVals.add(partitions.get(key)); - } - - HCatSchema outputSchema = convertPigSchemaToHCatSchema(pigSchema, - hcatTblSchema); - LOG.debug("Pig Schema '" + pigSchema.toString() + "' was converted to HCatSchema '" - + outputSchema); - HCatEximOutputFormat.setOutput(job, - dbname, tablename, - outputLocation, - new HCatSchema(hcatFields), - partVals, - outputSchema); - p.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(outputSchema)); - p.setProperty(HCatConstants.HCAT_KEY_OUTPUT_INFO, - config.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - if (config.get(HCatConstants.HCAT_KEY_HIVE_CONF) != null) { - p.setProperty(HCatConstants.HCAT_KEY_HIVE_CONF, - config.get(HCatConstants.HCAT_KEY_HIVE_CONF)); - } - } else { - config.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, - p.getProperty(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - if (p.getProperty(HCatConstants.HCAT_KEY_HIVE_CONF) != null) { - config.set(HCatConstants.HCAT_KEY_HIVE_CONF, - p.getProperty(HCatConstants.HCAT_KEY_HIVE_CONF)); - } - } - } - - @Override - public void storeSchema(ResourceSchema schema, String arg1, Job job) throws IOException { - if( job.getConfiguration().get("mapred.job.tracker", "").equalsIgnoreCase("local") ) { - //In local mode, mapreduce will not call OutputCommitter.cleanupJob. - //Calling it from here so that the partition publish happens. - //This call needs to be removed after MAPREDUCE-1447 is fixed. - new HCatEximOutputCommitter(job,null).cleanupJob(job); - } - } -} Index: hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximLoader.java.broken =================================================================== --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximLoader.java.broken (revision 1673556) +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximLoader.java.broken (working copy) @@ -1,352 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.Iterator; -import java.util.Map; -import java.util.Properties; -import java.util.TreeMap; - -import junit.framework.TestCase; - -import org.apache.hcatalog.MiniCluster; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.backend.executionengine.ExecException; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.util.UDFContext; - -/** - * - * TestHCatEximLoader. Assumes Exim storer is working well - * - */ -public class TestHCatEximLoader extends TestCase { - - private static final String NONPART_TABLE = "junit_unparted"; - private static final String PARTITIONED_TABLE = "junit_parted"; - private static MiniCluster cluster = MiniCluster.buildCluster(); - - private static final String dataLocation = "/tmp/data"; - private static String fqdataLocation; - private static final String exportLocation = "/tmp/export"; - private static String fqexportLocation; - - private static Properties props; - - private void cleanup() throws IOException { - MiniCluster.deleteFile(cluster, dataLocation); - MiniCluster.deleteFile(cluster, exportLocation); - } - - @Override - protected void setUp() throws Exception { - props = new Properties(); - props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); - System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() - + ", fs.default.name : " + props.getProperty("fs.default.name")); - fqdataLocation = cluster.getProperties().getProperty("fs.default.name") + dataLocation; - fqexportLocation = cluster.getProperties().getProperty("fs.default.name") + exportLocation; - System.out.println("FQ Data Location :" + fqdataLocation); - System.out.println("FQ Export Location :" + fqexportLocation); - cleanup(); - } - - @Override - protected void tearDown() throws Exception { - cleanup(); - } - - private void populateDataFile() throws IOException { - MiniCluster.deleteFile(cluster, dataLocation); - String[] input = new String[] { - "237,Krishna,01/01/1990,M,IN,TN", - "238,Kalpana,01/01/2000,F,IN,KA", - "239,Satya,01/01/2001,M,US,TN", - "240,Kavya,01/01/2002,F,US,KA" - }; - MiniCluster.createInputFile(cluster, dataLocation, input); - } - - private static class EmpDetail { - String name; - String dob; - String mf; - String country; - String state; - } - - private void assertEmpDetail(Tuple t, Map eds) throws ExecException { - assertNotNull(t); - assertEquals(6, t.size()); - - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertTrue(t.get(2).getClass() == String.class); - assertTrue(t.get(3).getClass() == String.class); - assertTrue(t.get(4).getClass() == String.class); - assertTrue(t.get(5).getClass() == String.class); - - EmpDetail ed = eds.remove(t.get(0)); - assertNotNull(ed); - - assertEquals(ed.name, t.get(1)); - assertEquals(ed.dob, t.get(2)); - assertEquals(ed.mf, t.get(3)); - assertEquals(ed.country, t.get(4)); - assertEquals(ed.state, t.get(5)); - } - - private void addEmpDetail(Map empDetails, int id, String name, - String dob, String mf, String country, String state) { - EmpDetail ed = new EmpDetail(); - ed.name = name; - ed.dob = dob; - ed.mf = mf; - ed.country = country; - ed.state = state; - empDetails.put(id, ed); - } - - - - private void assertEmpDetail(Tuple t, Integer id, String name, String dob, String mf) - throws ExecException { - assertNotNull(t); - assertEquals(4, t.size()); - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertTrue(t.get(2).getClass() == String.class); - assertTrue(t.get(3).getClass() == String.class); - - assertEquals(id, t.get(0)); - assertEquals(name, t.get(1)); - assertEquals(dob, t.get(2)); - assertEquals(mf, t.get(3)); - } - - private void assertEmpDetail(Tuple t, String mf, String name) - throws ExecException { - assertNotNull(t); - assertEquals(2, t.size()); - assertTrue(t.get(0).getClass() == String.class); - assertTrue(t.get(1).getClass() == String.class); - - assertEquals(mf, t.get(0)); - assertEquals(name, t.get(1)); - } - - - - public void testLoadNonPartTable() throws Exception { - populateDataFile(); - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation - + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); - server.registerQuery("store A into '" + NONPART_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader();"); - Iterator XIter = server.openIterator("A"); - assertTrue(XIter.hasNext()); - Tuple t = XIter.next(); - assertEmpDetail(t, 237, "Krishna", "01/01/1990", "M"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, 238, "Kalpana", "01/01/2000", "F"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, 239, "Satya", "01/01/2001", "M"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, 240, "Kavya", "01/01/2002", "F"); - assertFalse(XIter.hasNext()); - } - } - - public void testLoadNonPartProjection() throws Exception { - populateDataFile(); - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation - + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); - server.registerQuery("store A into '" + NONPART_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader();"); - server.registerQuery("B = foreach A generate emp_sex, emp_name;"); - - Iterator XIter = server.openIterator("B"); - assertTrue(XIter.hasNext()); - Tuple t = XIter.next(); - assertEmpDetail(t, "M", "Krishna"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, "F", "Kalpana"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, "M", "Satya"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, "F", "Kavya"); - assertFalse(XIter.hasNext()); - } - } - - - public void testLoadMultiPartTable() throws Exception { - { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation + - "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);" - ); - server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';"); - server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';"); - server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';"); - server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';"); - server.registerQuery("store INTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=tn');"); - server.registerQuery("store INKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=ka');"); - server.registerQuery("store USTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=tn');"); - server.registerQuery("store USKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=ka');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader() " - //+ "as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);"); - + ";"); - - Iterator XIter = server.openIterator("A"); - - Map empDetails = new TreeMap(); - addEmpDetail(empDetails, 237, "Krishna", "01/01/1990", "M", "in", "tn"); - addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka"); - addEmpDetail(empDetails, 239, "Satya", "01/01/2001", "M", "us", "tn"); - addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka"); - - while(XIter.hasNext()) { - Tuple t = XIter.next(); - assertNotSame(0, empDetails.size()); - assertEmpDetail(t, empDetails); - } - assertEquals(0, empDetails.size()); - } - } - - public void testLoadMultiPartFilter() throws Exception { - { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation + - "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);" - ); - server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';"); - server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';"); - server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';"); - server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';"); - server.registerQuery("store INTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=tn');"); - server.registerQuery("store INKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=ka');"); - server.registerQuery("store USTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=tn');"); - server.registerQuery("store USKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=ka');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader() " - + ";"); - server.registerQuery("B = filter A by emp_state == 'ka';"); - - Iterator XIter = server.openIterator("B"); - - Map empDetails = new TreeMap(); - addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka"); - addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka"); - - while(XIter.hasNext()) { - Tuple t = XIter.next(); - assertNotSame(0, empDetails.size()); - assertEmpDetail(t, empDetails); - } - assertEquals(0, empDetails.size()); - } - } - - -} Index: hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java =================================================================== --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java (revision 1673556) +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java (working copy) @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.WindowsPathUtil; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.StorageFormats; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; @@ -51,6 +52,7 @@ import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.Shell; import org.apache.hive.hcatalog.HcatTestUtils; import org.apache.hive.hcatalog.common.HCatUtil; import org.apache.hive.hcatalog.common.HCatConstants; @@ -179,6 +181,11 @@ hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + + if (Shell.WINDOWS) { + WindowsPathUtil.convertPathsFromWindowsToHdfs(hiveConf); + } + driver = new Driver(hiveConf); SessionState.start(new CliSessionState(hiveConf)); Index: hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java =================================================================== --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java (revision 1673556) +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java (working copy) @@ -33,11 +33,13 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.WindowsPathUtil; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.StorageFormats; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.util.Shell; import org.apache.pig.ExecType; import org.apache.pig.PigServer; import org.apache.pig.backend.executionengine.ExecException; @@ -123,6 +125,11 @@ hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + + if (Shell.WINDOWS) { + WindowsPathUtil.convertPathsFromWindowsToHdfs(hiveConf); + } + driver = new Driver(hiveConf); SessionState.start(new CliSessionState(hiveConf)); //props = new Properties(); Index: hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java =================================================================== --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java (revision 0) +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java (working copy) @@ -0,0 +1,425 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +import java.io.File; +import java.io.IOException; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.io.FileUtils; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.WindowsPathUtil; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.StorageFormats; +import org.apache.hadoop.hive.ql.processors.CommandProcessor; +import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.processors.HiveCommand; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.shims.HadoopShims; +import org.apache.hadoop.hive.shims.ShimLoader; + +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.Shell; +import org.apache.hive.hcatalog.HcatTestUtils; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.Pair; + +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.data.Tuple; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.Assert.*; +import static org.junit.Assume.assumeTrue; + +@RunWith(Parameterized.class) +public class TestHCatLoaderEncryption { + private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoader.class); + private static final String TEST_DATA_DIR = HCatUtil.makePathASafeFileName(System.getProperty + ("java.io.tmpdir") + File.separator + TestHCatLoader.class.getCanonicalName() + "-" + + System.currentTimeMillis()); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String BASIC_FILE_NAME = TEST_DATA_DIR + "/basic.input.data"; + private static final String BASIC_TABLE = "junit_unparted_basic"; + private static final String ENCRYPTED_TABLE = "encrypted_table"; + private static final String SECURITY_KEY_PROVIDER_URI_NAME = "dfs.encryption.key.provider.uri"; + + private HadoopShims.MiniDFSShim dfs = null; + private HadoopShims.HdfsEncryptionShim hes = null; + private final String[] testOnlyCommands = new String[]{"crypto"}; + private final String[] encryptionUnsupportedHadoopVersion = new String[]{ShimLoader + .HADOOP20SVERSIONNAME}; + private boolean isEncryptionTestEnabled = true; + private Driver driver; + private Map> basicInputData; + private static List readRecords = new ArrayList(); + + private static final Map> DISABLED_STORAGE_FORMATS = + new HashMap>() {{ + put(IOConstants.PARQUETFILE, new HashSet() {{ + add("testReadDataBasic"); + add("testReadPartitionedBasic"); + add("testProjectionsBasic"); + add("testReadDataFromEncryptedHiveTable"); + }}); + }}; + + private String storageFormat; + + @Parameterized.Parameters + public static Collection generateParameters() { + return StorageFormats.names(); + } + + public TestHCatLoaderEncryption(String storageFormat) { + this.storageFormat = storageFormat; + } + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + dropTable(tablename, driver); + } + + static void dropTable(String tablename, Driver driver) throws IOException, CommandNeedRetryException { + driver.run("drop table if exists " + tablename); + } + + private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + createTable(tablename, schema, partitionedBy, driver, storageFormat); + } + + static void createTable(String tablename, String schema, String partitionedBy, Driver driver, String storageFormat) + throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; + } + createTable = createTable + "stored as " +storageFormat; + executeStatementOnDriver(createTable, driver); + } + + private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + createTable(tablename, schema, null); + } + + /** + * Execute Hive CLI statement + * @param cmd arbitrary statement to execute + */ + static void executeStatementOnDriver(String cmd, Driver driver) throws IOException, CommandNeedRetryException { + LOG.debug("Executing: " + cmd); + CommandProcessorResponse cpr = driver.run(cmd); + if(cpr.getResponseCode() != 0) { + throw new IOException("Failed to execute \"" + cmd + "\". Driver returned " + cpr.getResponseCode() + " Error: " + cpr.getErrorMessage()); + } + } + + @Before + public void setup() throws Exception { + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); + } + if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { + throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR); + } + + HiveConf hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + + if (Shell.WINDOWS) { + WindowsPathUtil.convertPathsFromWindowsToHdfs(hiveConf); + } + + driver = new Driver(hiveConf); + + checkShimLoaderVersion(); + initEncryptionShim(hiveConf); + String encryptedTablePath = TEST_WAREHOUSE_DIR + "/encryptedTable"; + SessionState.start(new CliSessionState(hiveConf)); + + SessionState.get().out = System.out; + + createTable(BASIC_TABLE, "a int, b string"); + createTableInSpecifiedPath(ENCRYPTED_TABLE, "a int, b string", encryptedTablePath, driver); + + associateEncryptionZoneWithPath(encryptedTablePath); + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + basicInputData = new HashMap>(); + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + String sj = "S" + j + "S"; + input[k] = si + "\t" + sj; + basicInputData.put(k, new Pair(i, sj)); + k++; + } + } + HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + int i = 0; + server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);", ++i); + server.registerQuery("store A into '" + ENCRYPTED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i); + server.executeBatch(); + } + + void checkShimLoaderVersion() { + for (String v : encryptionUnsupportedHadoopVersion) { + if (ShimLoader.getMajorVersion().equals(v)) { + isEncryptionTestEnabled = false; + return; + } + } + } + + void initEncryptionShim(HiveConf conf) throws IOException { + if (!isEncryptionTestEnabled) { + return; + } + FileSystem fs; + HadoopShims shims = ShimLoader.getHadoopShims(); + conf.set(SECURITY_KEY_PROVIDER_URI_NAME, getKeyProviderURI()); + + int numberOfDataNodes = 4; + dfs = shims.getMiniDfs(conf, numberOfDataNodes, true, null); + fs = dfs.getFileSystem(); + + // set up a java key provider for encrypted hdfs cluster + hes = shims.createHdfsEncryptionShim(fs, conf); + } + + public static String ensurePathEndsInSlash(String path) { + if (path == null) { + throw new NullPointerException("Path cannot be null"); + } + if (path.endsWith(File.separator)) { + return path; + } else { + return path + File.separator; + } + } + + private void associateEncryptionZoneWithPath(String path) throws SQLException, CommandNeedRetryException { + if (!isEncryptionTestEnabled) { + return; + } + LOG.info(this.storageFormat + ": associateEncryptionZoneWithPath"); + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); + enableTestOnlyCmd(SessionState.get().getConf()); + CommandProcessor crypto = getTestCommand("crypto"); + if (crypto == null) return; + checkExecutionResponse(crypto.run("CREATE_KEY --keyName key_128 --bitLength 128")); + checkExecutionResponse(crypto.run("CREATE_ZONE --keyName key_128 --path " + path)); + } + + private void checkExecutionResponse(CommandProcessorResponse response) { + int rc = response.getResponseCode(); + if (rc != 0) { + SessionState.get().out.println(response); + } + assertEquals("Crypto command failed with the exit code" + rc, 0, rc); + } + + private void removeEncryptionZone() throws SQLException, CommandNeedRetryException { + if (!isEncryptionTestEnabled) { + return; + } + LOG.info(this.storageFormat + ": removeEncryptionZone"); + enableTestOnlyCmd(SessionState.get().getConf()); + CommandProcessor crypto = getTestCommand("crypto"); + if (crypto == null) { + return; + } + checkExecutionResponse(crypto.run("DELETE_KEY --keyName key_128")); + } + + private CommandProcessor getTestCommand(final String commandName) throws SQLException { + HiveCommand testCommand = HiveCommand.find(new String[]{commandName}, HiveCommand.ONLY_FOR_TESTING); + + if (testCommand == null) { + return null; + } + + return CommandProcessorFactory + .getForHiveCommandInternal(new String[]{commandName}, SessionState.get().getConf(), + testCommand.isOnlyForTesting()); + } + + private void enableTestOnlyCmd(HiveConf conf){ + StringBuilder securityCMDs = new StringBuilder(conf.getVar(HiveConf.ConfVars.HIVE_SECURITY_COMMAND_WHITELIST)); + for(String c : testOnlyCommands){ + securityCMDs.append(","); + securityCMDs.append(c); + } + conf.set(HiveConf.ConfVars.HIVE_SECURITY_COMMAND_WHITELIST.toString(), securityCMDs.toString()); + } + + private String getKeyProviderURI() { + // Use the target directory if it is not specified + String HIVE_ROOT = ensurePathEndsInSlash(System.getProperty("hive.root")); + String keyDir = HIVE_ROOT + "ql/target/"; + + // put the jks file in the current test path only for test purpose + return "jceks://file" + new Path(keyDir, "test.jks").toUri(); + } + + @Test + public void testReadDataFromEncryptedHiveTableByPig() throws IOException { + assumeTrue(isEncryptionTestEnabled); + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); + PigServer server = new PigServer(ExecType.LOCAL); + + server.registerQuery("X = load '" + ENCRYPTED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("X"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(2, t.size()); + assertNotNull(t.get(0)); + assertNotNull(t.get(1)); + assertTrue(t.get(0).getClass() == Integer.class); + assertTrue(t.get(1).getClass() == String.class); + assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); + assertEquals(t.get(1), basicInputData.get(numTuplesRead).second); + numTuplesRead++; + } + assertEquals("failed with storage format: " + this.storageFormat, basicInputData.size(), numTuplesRead); + } + + @Test + public void testReadDataFromEncryptedHiveTableByHCatMR() throws Exception { + assumeTrue(isEncryptionTestEnabled); + assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS)); + + readRecords.clear(); + Configuration conf = new Configuration(); + Job job = new Job(conf, "hcat mapreduce read encryption test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(TestHCatLoaderEncryption.MapRead.class); + + // input/output settings + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + + HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, ENCRYPTED_TABLE, null); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + + job.setNumReduceTasks(0); + + FileSystem fs = new LocalFileSystem(); + Path path = new Path(TEST_DATA_DIR + "/testHCatMREncryptionOutput"); + if (fs.exists(path)) { + fs.delete(path, true); + } + + TextOutputFormat.setOutputPath(job, path); + + job.waitForCompletion(true); + + int numTuplesRead = 0; + for (HCatRecord hCatRecord : readRecords) { + assertEquals(2, hCatRecord.size()); + assertNotNull(hCatRecord.get(0)); + assertNotNull(hCatRecord.get(1)); + assertTrue(hCatRecord.get(0).getClass() == Integer.class); + assertTrue(hCatRecord.get(1).getClass() == String.class); + assertEquals(hCatRecord.get(0), basicInputData.get(numTuplesRead).first); + assertEquals(hCatRecord.get(1), basicInputData.get(numTuplesRead).second); + numTuplesRead++; + } + assertEquals("failed HCat MR read with storage format: " + this.storageFormat, + basicInputData.size(), numTuplesRead); + } + + public static class MapRead extends Mapper { + + @Override + public void map(WritableComparable key, HCatRecord value, Context context) + throws IOException, InterruptedException { + try { + readRecords.add(value); + } catch (Exception e) { + LOG.error("error when read record.", e); + throw new IOException(e); + } + } + } + + @After + public void tearDown() throws Exception { + try { + if (driver != null) { + dropTable(BASIC_TABLE); + dropTable(ENCRYPTED_TABLE); + removeEncryptionZone(); + } + } finally { + FileUtils.deleteDirectory(new File(TEST_DATA_DIR)); + } + } + + static void createTableInSpecifiedPath(String tableName, String schema, String path, Driver driver) throws IOException, CommandNeedRetryException { + String createTableStr; + createTableStr = "create table " + tableName + "(" + schema + ") location \'" + path + "\'"; + executeStatementOnDriver(createTableStr, driver); + } +} Index: hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPermsInheritance.java.broken =================================================================== --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPermsInheritance.java.broken (revision 1673556) +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPermsInheritance.java.broken (working copy) @@ -1,135 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - --->There are two pieces of code that sets directory permissions. --->One that sets the UMask which only woks for dfs filesystem. --->And the other change the permission of directories after they are created. --->I removed that since it is not secure and just add more load on the namenode. --->We should push this test to e2e to verify what actually runs in production. - -package org.apache.hcatalog.pig; - -import java.io.IOException; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.UnknownTableException; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hcatalog.ExitException; -import org.apache.hcatalog.NoExitSecurityManager; -import org.apache.hcatalog.cli.HCatCli; -import org.apache.hcatalog.pig.HCatStorer; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.impl.util.UDFContext; -import org.apache.thrift.TException; - -public class TestPermsInheritance extends TestCase { - - @Override - protected void setUp() throws Exception { - super.setUp(); - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - msc = new HiveMetaStoreClient(conf); - msc.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,"testNoPartTbl", true,true); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - msc.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,"testPartTbl", true,true); - pig = new PigServer(ExecType.LOCAL, conf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - } - - private HiveMetaStoreClient msc; - private SecurityManager securityManager; - private PigServer pig; - - @Override - protected void tearDown() throws Exception { - super.tearDown(); - System.setSecurityManager(securityManager); - } - - private final HiveConf conf = new HiveConf(this.getClass()); - - public void testNoPartTbl() throws IOException, MetaException, UnknownTableException, TException, NoSuchObjectException, HiveException{ - - try{ - HCatCli.main(new String[]{"-e","create table testNoPartTbl (line string) stored as RCFILE", "-p","rwx-wx---"}); - } - catch(Exception e){ - assertTrue(e instanceof ExitException); - assertEquals(((ExitException)e).getStatus(), 0); - } - Warehouse wh = new Warehouse(conf); - Path dfsPath = wh.getTablePath(Hive.get(conf).getDatabase(MetaStoreUtils.DEFAULT_DATABASE_NAME), "testNoPartTbl"); - FileSystem fs = dfsPath.getFileSystem(conf); - assertEquals(fs.getFileStatus(dfsPath).getPermission(),FsPermission.valueOf("drwx-wx---")); - - pig.setBatchOn(); - pig.registerQuery("A = load 'build.xml' as (line:chararray);"); - pig.registerQuery("store A into 'testNoPartTbl' using "+HCatStorer.class.getName()+"();"); - pig.executeBatch(); - FileStatus[] status = fs.listStatus(dfsPath,hiddenFileFilter); - - assertEquals(status.length, 1); - assertEquals(FsPermission.valueOf("drwx-wx---"),status[0].getPermission()); - - try{ - HCatCli.main(new String[]{"-e","create table testPartTbl (line string) partitioned by (a string) stored as RCFILE", "-p","rwx-wx--x"}); - } - catch(Exception e){ - assertTrue(e instanceof ExitException); - assertEquals(((ExitException)e).getStatus(), 0); - } - - dfsPath = wh.getTablePath(Hive.get(conf).getDatabase(MetaStoreUtils.DEFAULT_DATABASE_NAME), "testPartTbl"); - assertEquals(fs.getFileStatus(dfsPath).getPermission(),FsPermission.valueOf("drwx-wx--x")); - - pig.setBatchOn(); - pig.registerQuery("A = load 'build.xml' as (line:chararray);"); - pig.registerQuery("store A into 'testPartTbl' using "+HCatStorer.class.getName()+"('a=part');"); - pig.executeBatch(); - - Path partPath = new Path(dfsPath,"a=part"); - assertEquals(FsPermission.valueOf("drwx-wx--x"),fs.getFileStatus(partPath).getPermission()); - status = fs.listStatus(partPath,hiddenFileFilter); - assertEquals(status.length, 1); - assertEquals(FsPermission.valueOf("drwx-wx--x"),status[0].getPermission()); - } - - private static final PathFilter hiddenFileFilter = new PathFilter(){ - public boolean accept(Path p){ - String name = p.getName(); - return !name.startsWith("_") && !name.startsWith("."); - } - }; -} Index: hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigStorageDriver.java.broken =================================================================== --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigStorageDriver.java.broken (revision 1673556) +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigStorageDriver.java.broken (working copy) @@ -1,272 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.BufferedInputStream; -import java.io.DataInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.Iterator; -import java.util.Map; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.UnknownTableException; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.pig.HCatLoader; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.util.UDFContext; -import org.apache.thrift.TException; - -public class TestPigStorageDriver extends TestCase { - - private HiveConf hcatConf; - private Driver hcatDriver; - private HiveMetaStoreClient msc; - private static String tblLocation = "/tmp/test_pig/data"; - private static String anyExistingFileInCurDir = "ivy.xml"; - private static String warehouseDir = "/tmp/hcat_junit_warehouse"; - - @Override - protected void setUp() throws Exception { - - hcatConf = new HiveConf(this.getClass()); - hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - msc = new HiveMetaStoreClient(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - super.setUp(); - } - - @Override - protected void tearDown() throws Exception { - super.tearDown(); - } - - public void testPigStorageDriver() throws IOException, CommandNeedRetryException{ - - String fsLoc = hcatConf.get("fs.default.name"); - Path tblPath = new Path(fsLoc, tblLocation); - String tblName = "junit_pigstorage"; - tblPath.getFileSystem(hcatConf).copyFromLocalFile(new Path(anyExistingFileInCurDir),tblPath); - - hcatDriver.run("drop table " + tblName); - CommandProcessorResponse resp; - String createTable = "create table " + tblName + " (a string) partitioned by (b string) stored as TEXTFILE"; - - resp = hcatDriver.run(createTable); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table " + tblName + " add partition (b='2010-10-10') location '"+new Path(fsLoc, "/tmp/test_pig")+"'"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table " + tblName + " partition (b='2010-10-10') set fileformat TEXTFILE"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("desc extended " + tblName + " partition (b='2010-10-10')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - server.registerQuery(" a = load '" + tblName + "' using "+HCatLoader.class.getName()+";"); - Iterator itr = server.openIterator("a"); - boolean result = compareWithFile(itr, anyExistingFileInCurDir, 2, "2010-10-10", null); - assertTrue(result); - - server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); - server.store("a", tblName, HCatStorer.class.getName() + "('b=2010-10-11')"); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-10-11' using PigStorage() as (a:chararray);"); - itr = server.openIterator("a"); - result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-10-11", null); - assertTrue(result); - - // Test multi-store - server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); - server.registerQuery("store a into '" + tblName + "' using " + HCatStorer.class.getName() + "('b=2010-11-01');"); - server.registerQuery("store a into '" + tblName + "' using " + HCatStorer.class.getName() + "('b=2010-11-02');"); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-11-01' using PigStorage() as (a:chararray);"); - itr = server.openIterator("a"); - result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-11-01", null); - assertTrue(result); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-11-02' using PigStorage() as (a:chararray);"); - itr = server.openIterator("a"); - result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-11-02", null); - assertTrue(result); - - hcatDriver.run("drop table " + tblName); - } - - private boolean compareWithFile(Iterator itr, String factFile, int numColumn, String key, String valueSuffix) throws IOException { - DataInputStream stream = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(factFile)))); - while(itr.hasNext()){ - Tuple t = itr.next(); - assertEquals(numColumn, t.size()); - if(t.get(0) != null) { - // If underlying data-field is empty. PigStorage inserts null instead - // of empty String objects. - assertTrue(t.get(0) instanceof String); - String expected = stream.readLine(); - if (valueSuffix!=null) - expected += valueSuffix; - assertEquals(expected, t.get(0)); - } - else{ - assertTrue(stream.readLine().isEmpty()); - } - - if (numColumn>1) { - // The second column must be key - assertTrue(t.get(1) instanceof String); - assertEquals(key, t.get(1)); - } - } - assertEquals(0,stream.available()); - stream.close(); - return true; - } - - public void testDelim() throws MetaException, TException, UnknownTableException, NoSuchObjectException, InvalidOperationException, IOException, CommandNeedRetryException{ - - hcatDriver.run("drop table junit_pigstorage_delim"); - - CommandProcessorResponse resp; - String createTable = "create table junit_pigstorage_delim (a0 string, a1 string) partitioned by (b string) stored as RCFILE"; - - resp = hcatDriver.run(createTable); - - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table junit_pigstorage_delim add partition (b='2010-10-10')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table junit_pigstorage_delim partition (b='2010-10-10') set fileformat TEXTFILE"); - - Partition part = msc.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, "junit_pigstorage_delim", "b=2010-10-10"); - Map partParms = part.getParameters(); - partParms.put(HCatConstants.HCAT_PIG_LOADER_ARGS, "control-A"); - partParms.put(HCatConstants.HCAT_PIG_STORER_ARGS, "control-A"); - - msc.alter_partition(MetaStoreUtils.DEFAULT_DATABASE_NAME, "junit_pigstorage_delim", part); - - PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - server.registerQuery(" a = load 'junit_pigstorage_delim' using "+HCatLoader.class.getName()+";"); - try{ - server.openIterator("a"); - }catch(FrontendException fe){} - - resp = hcatDriver.run("alter table junit_pigstorage_delim set fileformat TEXTFILE"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - resp = hcatDriver.run("alter table junit_pigstorage_delim set TBLPROPERTIES ('hcat.pig.loader.args'=':', 'hcat.pig.storer.args'=':')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - File inputFile = File.createTempFile("hcat_test", ""); - PrintWriter p = new PrintWriter(new FileWriter(inputFile)); - p.println("1\t2"); - p.println("3\t4"); - p.close(); - server.registerQuery("a = load '"+inputFile.toString()+"' using PigStorage() as (a0:chararray, a1:chararray);"); - server.store("a", "junit_pigstorage_delim", HCatStorer.class.getName() + "('b=2010-10-11')"); - - server.registerQuery("a = load '/tmp/hcat_junit_warehouse/junit_pigstorage_delim/b=2010-10-11' using PigStorage() as (a:chararray);"); - Iterator itr = server.openIterator("a"); - - assertTrue(itr.hasNext()); - Tuple t = itr.next(); - assertTrue(t.get(0).equals("1:2")); - - assertTrue(itr.hasNext()); - t = itr.next(); - assertTrue(t.get(0).equals("3:4")); - - assertFalse(itr.hasNext()); - inputFile.delete(); - } - - public void testMultiConstructArgs() throws MetaException, TException, UnknownTableException, NoSuchObjectException, InvalidOperationException, IOException, CommandNeedRetryException{ - - String fsLoc = hcatConf.get("fs.default.name"); - Path tblPath = new Path(fsLoc, tblLocation); - String tblName = "junit_pigstorage_constructs"; - tblPath.getFileSystem(hcatConf).copyFromLocalFile(new Path(anyExistingFileInCurDir),tblPath); - - hcatDriver.run("drop table junit_pigstorage_constructs"); - - CommandProcessorResponse resp; - String createTable = "create table " + tblName + " (a string) partitioned by (b string) stored as TEXTFILE"; - - resp = hcatDriver.run(createTable); - - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table " + tblName + " set TBLPROPERTIES ('hcat.pig.storer'='org.apache.hcatalog.pig.MyPigStorage', 'hcat.pig.storer.args'=':#hello', 'hcat.pig.args.delimiter'='#')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - - server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); - server.store("a", tblName, HCatStorer.class.getName() + "('b=2010-10-11')"); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-10-11' using PigStorage() as (a:chararray);"); - Iterator itr = server.openIterator("a"); - boolean result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-10-11", ":hello"); - assertTrue(result); - } -} Index: hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java =================================================================== --- hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java (revision 1673556) +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java (working copy) @@ -29,7 +29,6 @@ import org.apache.hadoop.hive.metastore.api.NotificationEvent; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.events.AddPartitionEvent; import org.apache.hadoop.hive.metastore.events.AlterPartitionEvent; import org.apache.hadoop.hive.metastore.events.AlterTableEvent; @@ -44,7 +43,6 @@ import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.messaging.MessageFactory; -import java.util.Map; import java.util.concurrent.TimeUnit; /** @@ -146,11 +144,9 @@ NotificationEvent event = new NotificationEvent(0, now(), HCatConstants.HCAT_ALTER_TABLE_EVENT, msgFactory.buildAlterTableMessage(before, after).toString()); - if (event != null) { - event.setDbName(after.getDbName()); - event.setTableName(after.getTableName()); - enqueue(event); - } + event.setDbName(after.getDbName()); + event.setTableName(after.getTableName()); + enqueue(event); } /** @@ -162,7 +158,7 @@ Table t = partitionEvent.getTable(); NotificationEvent event = new NotificationEvent(0, now(), HCatConstants.HCAT_ADD_PARTITION_EVENT, - msgFactory.buildAddPartitionMessage(t, partitionEvent.getPartitions()).toString()); + msgFactory.buildAddPartitionMessage(t, partitionEvent.getPartitionIterator()).toString()); event.setDbName(t.getDbName()); event.setTableName(t.getTableName()); enqueue(event); @@ -192,11 +188,9 @@ NotificationEvent event = new NotificationEvent(0, now(), HCatConstants.HCAT_ALTER_PARTITION_EVENT, msgFactory.buildAlterPartitionMessage(before, after).toString()); - if (event != null) { - event.setDbName(before.getDbName()); - event.setTableName(before.getTableName()); - enqueue(event); - } + event.setDbName(before.getDbName()); + event.setTableName(before.getTableName()); + enqueue(event); } /** Index: hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/NotificationListener.java =================================================================== --- hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/NotificationListener.java (revision 1673556) +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/NotificationListener.java (working copy) @@ -21,8 +21,6 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.List; -import java.util.Map; import javax.jms.Connection; import javax.jms.ConnectionFactory; @@ -130,15 +128,14 @@ // and message selector string as "HCAT_EVENT = HCAT_ADD_PARTITION" if (partitionEvent.getStatus()) { Table table = partitionEvent.getTable(); - List partitions = partitionEvent.getPartitions(); String topicName = getTopicName(table); if (topicName != null && !topicName.equals("")) { - send(messageFactory.buildAddPartitionMessage(table, partitions), topicName); + send(messageFactory.buildAddPartitionMessage(table, partitionEvent.getPartitionIterator()), topicName); } else { LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " - + partitions.get(0).getDbName() + + partitionEvent.getTable().getDbName() + "." - + partitions.get(0).getTableName() + + partitionEvent.getTable().getTableName() + " To enable notifications for this table, please do alter table set properties (" + HCatConstants.HCAT_MSGBUS_TOPIC_NAME + "=.) or whatever you want topic name to be."); Index: hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageFactory.java =================================================================== --- hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageFactory.java (revision 1673556) +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageFactory.java (working copy) @@ -20,16 +20,14 @@ package org.apache.hive.hcatalog.messaging; import org.apache.hadoop.hive.common.JavaUtils; -import org.apache.hadoop.hive.common.classification.InterfaceAudience; -import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hive.hcatalog.messaging.json.JSONMessageFactory; +import java.util.Iterator; import java.util.List; /** @@ -140,22 +138,12 @@ /** * Factory method for AddPartitionMessage. * @param table The Table to which the partitions are added. - * @param partitions The set of Partitions being added. + * @param partitions The iterator to set of Partitions being added. * @return AddPartitionMessage instance. */ - public abstract AddPartitionMessage buildAddPartitionMessage(Table table, List partitions); + public abstract AddPartitionMessage buildAddPartitionMessage(Table table, Iterator partitions); /** - * Factory method for AddPartitionMessage. - * @param table The Table to which the partitions are added. - * @param partitionSpec The set of Partitions being added. - * @return AddPartitionMessage instance. - */ - @InterfaceAudience.LimitedPrivate({"Hive"}) - @InterfaceStability.Evolving - public abstract AddPartitionMessage buildAddPartitionMessage(Table table, PartitionSpecProxy partitionSpec); - - /** * Factory method for building AlterPartitionMessage * @param before The partition before it was altered * @param after The partition after it was altered Index: hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageFactory.java =================================================================== --- hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageFactory.java (revision 1673556) +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageFactory.java (working copy) @@ -19,14 +19,14 @@ package org.apache.hive.hcatalog.messaging.json; +import com.google.common.base.Function; +import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.classification.InterfaceAudience; -import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hive.hcatalog.messaging.AddPartitionMessage; import org.apache.hive.hcatalog.messaging.AlterPartitionMessage; import org.apache.hive.hcatalog.messaging.AlterTableMessage; @@ -39,7 +39,12 @@ import org.apache.hive.hcatalog.messaging.MessageDeserializer; import org.apache.hive.hcatalog.messaging.MessageFactory; -import java.util.*; +import javax.annotation.Nullable; +import java.util.Arrays; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; /** * The JSON implementation of the MessageFactory. Constructs JSON implementations of @@ -98,20 +103,12 @@ } @Override - public AddPartitionMessage buildAddPartitionMessage(Table table, List partitions) { + public AddPartitionMessage buildAddPartitionMessage(Table table, Iterator partitionsIterator) { return new JSONAddPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), - table.getTableName(), getPartitionKeyValues(table, partitions), now()); + table.getTableName(), getPartitionKeyValues(table, partitionsIterator), now()); } @Override - @InterfaceAudience.LimitedPrivate({"Hive"}) - @InterfaceStability.Evolving - public AddPartitionMessage buildAddPartitionMessage(Table table, PartitionSpecProxy partitionSpec) { - return new JSONAddPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), - table.getTableName(), getPartitionKeyValues(table, partitionSpec), now()); - } - - @Override public AlterPartitionMessage buildAlterPartitionMessage(Partition before, Partition after) { return new JSONAlterPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, before.getDbName(), before.getTableName(), before.getValues(), now()); @@ -142,22 +139,12 @@ return partitionKeys; } - private static List> getPartitionKeyValues(Table table, List partitions) { - List> partitionList = new ArrayList>(partitions.size()); - for (Partition partition : partitions) - partitionList.add(getPartitionKeyValues(table, partition)); - return partitionList; + private static List> getPartitionKeyValues(final Table table, Iterator iterator) { + return Lists.newArrayList(Iterators.transform(iterator, new Function>() { + @Override + public Map apply(@Nullable Partition partition) { + return getPartitionKeyValues(table, partition); + } + })); } - - @InterfaceAudience.LimitedPrivate({"Hive"}) - @InterfaceStability.Evolving - private static List> getPartitionKeyValues(Table table, PartitionSpecProxy partitionSpec) { - List> partitionList = new ArrayList>(); - PartitionSpecProxy.PartitionIterator iterator = partitionSpec.getPartitionIterator(); - while (iterator.hasNext()) { - Partition partition = iterator.next(); - partitionList.add(getPartitionKeyValues(table, partition)); - } - return partitionList; - } } Index: hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml =================================================================== --- hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml (revision 1673556) +++ hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml (working copy) @@ -35,7 +35,7 @@ templeton.libjars - ${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.5.jar + ${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar Jars to add to the classpath. @@ -69,7 +69,12 @@ shipped to the target node in the cluster to execute Pig job which uses HCat, Hive query, etc. + + templeton.hive.extra.files + ${env.TEZ_CLIENT_HOME}/conf/tez-site.xml,${env.TEZ_CLIENT_HOME}/,${env.TEZ_CLIENT_HOME}/lib + + templeton.hcat.home apache-hive-${env.HIVE_VERSION}-bin.tar.gz/apache-hive-${env.HIVE_VERSION}-bin/hcatalog The path to the HCat home within the tar. This is needed if @@ -101,7 +106,7 @@ - + templeton.hive.properties hive.metastore.uris=thrift://localhost:9933,hive.metastore.sasl.enabled=false Index: hcatalog/src/test/e2e/templeton/deployers/env.sh =================================================================== --- hcatalog/src/test/e2e/templeton/deployers/env.sh (revision 1673556) +++ hcatalog/src/test/e2e/templeton/deployers/env.sh (working copy) @@ -36,6 +36,10 @@ export PIG_VERSION=0.12.2-SNAPSHOT fi +if [ -z ${TEZ_VERSION} ]; then + export TEZ_VERSION=0.5.3 +fi + #Root of project source tree if [ -z ${PROJ_HOME} ]; then export PROJ_HOME=/Users/${USER}/dev/hive @@ -46,6 +50,7 @@ export HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION} fi +export TEZ_CLIENT_HOME=/Users/ekoifman/dev/apache-tez-client-${TEZ_VERSION} #Make sure Pig is built for the Hadoop version you are running export PIG_TAR_PATH=/Users/${USER}/dev/pig-${PIG_VERSION}-src/build #this is part of Pig distribution Index: hcatalog/webhcat/java-client/pom.xml =================================================================== --- hcatalog/webhcat/java-client/pom.xml (revision 1673556) +++ hcatalog/webhcat/java-client/pom.xml (working copy) @@ -46,6 +46,11 @@ ${project.version} + org.apache.hive.hcatalog + hive-hcatalog-server-extensions + ${project.version} + + org.apache.hive hive-exec ${project.version} Index: hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java =================================================================== --- hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java (revision 1673556) +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java (working copy) @@ -18,6 +18,7 @@ */ package org.apache.hive.hcatalog.api; +import java.util.Iterator; import java.util.List; import java.util.Map; @@ -27,6 +28,7 @@ import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.PartitionEventType; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hive.hcatalog.api.repl.ReplicationTask; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; @@ -379,6 +381,24 @@ throws HCatException; /** + * Drops partition(s) that match the specified (and possibly partial) partition specification. + * A partial partition-specification is one where not all partition-keys have associated values. For example, + * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), + * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, + * 1. Complete partition spec: dropPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would drop 1 partition. + * 2. Partial partition spec: dropPartitions('myDb', 'myTable', {dt='20120101'}) would drop all 3 partitions, + * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). + * @param dbName The database name. + * @param tableName The table name. + * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. + * @param ifExists Hive returns an error if the partition specified does not exist, unless ifExists is set to true. + * @param deleteData Whether to delete the underlying data. + * @throws HCatException,ConnectionFailureException + */ + public abstract void dropPartitions(String dbName, String tableName, + Map partitionSpec, boolean ifExists, boolean deleteData) + throws HCatException; + /** * List partitions by filter. * * @param dbName The database name. @@ -467,7 +487,24 @@ */ public abstract String getMessageBusTopicName(String dbName, String tableName) throws HCatException; + /** + * Get an iterator that iterates over a list of replication tasks needed to replicate all the + * events that have taken place for a given db/table. + * @param lastEventId : The last event id that was processed for this reader. The returned + * replication tasks will start from this point forward + * @param maxEvents : Maximum number of events to consider for generating the + * replication tasks. If < 1, then all available events will be considered. + * @param dbName : The database name for which we're interested in the events for. + * @param tableName : The table name for which we're interested in the events for - if null, + * then this function will behave as if it were running at a db level. + * @return an iterator over a list of replication events that can be processed one by one. + * @throws HCatException + */ + public abstract Iterator getReplicationTasks( + long lastEventId, int maxEvents, String dbName, String tableName) throws HCatException; + + /** * Get a list of notifications * @param lastEventId The last event id that was consumed by this reader. The returned * notifications will start at the next eventId available this eventId that Index: hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java =================================================================== --- hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java (revision 1673556) +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java (working copy) @@ -21,9 +21,11 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.Map; +import com.google.common.base.Function; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.commons.lang.StringUtils; @@ -63,6 +65,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hive.hcatalog.api.repl.HCatReplicationTaskIterator; +import org.apache.hive.hcatalog.api.repl.ReplicationTask; import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.common.HCatUtil; @@ -72,6 +76,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; + /** * The HCatClientHMSImpl is the Hive Metastore client based implementation of * HCatClient. @@ -567,33 +573,35 @@ && "TRUE".equalsIgnoreCase(table.getParameters().get("EXTERNAL")); } - private void dropPartitionsUsingExpressions(Table table, Map partitionSpec, boolean ifExists) - throws SemanticException, TException { + private void dropPartitionsUsingExpressions(Table table, Map partitionSpec, + boolean ifExists, boolean deleteData) + throws SemanticException, TException { LOG.info("HCatClient: Dropping partitions using partition-predicate Expressions."); ExprNodeGenericFuncDesc partitionExpression = new ExpressionBuilder(table, partitionSpec).build(); ObjectPair serializedPartitionExpression = new ObjectPair(partitionSpec.size(), Utilities.serializeExpressionToKryo(partitionExpression)); hmsClient.dropPartitions(table.getDbName(), table.getTableName(), Arrays.asList(serializedPartitionExpression), - !isExternal(table), // Delete data? - false, // Ignore Protection? - ifExists, // Fail if table doesn't exist? - false); // Need results back? + deleteData && !isExternal(table), // Delete data? + false, // Ignore Protection? + ifExists, // Fail if table doesn't exist? + false); // Need results back? } private void dropPartitionsIteratively(String dbName, String tableName, - Map partitionSpec, boolean ifExists) throws HCatException, TException { + Map partitionSpec, boolean ifExists, boolean deleteData) + throws HCatException, TException { LOG.info("HCatClient: Dropping partitions iteratively."); List partitions = hmsClient.listPartitionsByFilter(dbName, tableName, getFilterString(partitionSpec), (short) -1); for (Partition partition : partitions) { - dropPartition(partition, ifExists); + dropPartition(partition, ifExists, deleteData); } } @Override public void dropPartitions(String dbName, String tableName, - Map partitionSpec, boolean ifExists) + Map partitionSpec, boolean ifExists, boolean deleteData) throws HCatException { LOG.info("HCatClient dropPartitions(db=" + dbName + ",table=" + tableName + ", partitionSpec: ["+ partitionSpec + "])."); try { @@ -602,17 +610,17 @@ if (hiveConfig.getBoolVar(HiveConf.ConfVars.METASTORE_CLIENT_DROP_PARTITIONS_WITH_EXPRESSIONS)) { try { - dropPartitionsUsingExpressions(table, partitionSpec, ifExists); + dropPartitionsUsingExpressions(table, partitionSpec, ifExists, deleteData); } catch (SemanticException parseFailure) { LOG.warn("Could not push down partition-specification to back-end, for dropPartitions(). Resorting to iteration.", parseFailure); - dropPartitionsIteratively(dbName, tableName, partitionSpec, ifExists); + dropPartitionsIteratively(dbName, tableName, partitionSpec, ifExists, deleteData); } } else { // Not using expressions. - dropPartitionsIteratively(dbName, tableName, partitionSpec, ifExists); + dropPartitionsIteratively(dbName, tableName, partitionSpec, ifExists, deleteData); } } catch (NoSuchObjectException e) { throw new ObjectNotFoundException( @@ -627,10 +635,16 @@ } } - private void dropPartition(Partition partition, boolean ifExists) + @Override + public void dropPartitions(String dbName, String tableName, + Map partitionSpec, boolean ifExists) throws HCatException { + dropPartitions(dbName, tableName, partitionSpec, ifExists, true); + } + + private void dropPartition(Partition partition, boolean ifExists, boolean deleteData) throws HCatException, MetaException, TException { try { - hmsClient.dropPartition(partition.getDbName(), partition.getTableName(), partition.getValues()); + hmsClient.dropPartition(partition.getDbName(), partition.getTableName(), partition.getValues(), deleteData); } catch (NoSuchObjectException e) { if (!ifExists) { throw new ObjectNotFoundException( @@ -965,18 +979,27 @@ } @Override + public Iterator getReplicationTasks( + long lastEventId, int maxEvents, String dbName, String tableName) throws HCatException { + return new HCatReplicationTaskIterator(this,lastEventId,maxEvents,dbName,tableName); + } + + @Override public List getNextNotification(long lastEventId, int maxEvents, IMetaStoreClient.NotificationFilter filter) throws HCatException { try { - List events = new ArrayList(); NotificationEventResponse rsp = hmsClient.getNextNotification(lastEventId, maxEvents, filter); if (rsp != null && rsp.getEvents() != null) { - for (NotificationEvent event : rsp.getEvents()) { - events.add(new HCatNotificationEvent(event)); - } + return Lists.transform(rsp.getEvents(), new Function() { + @Override + public HCatNotificationEvent apply(@Nullable NotificationEvent notificationEvent) { + return new HCatNotificationEvent(notificationEvent); + } + }); + } else { + return new ArrayList(); } - return events; } catch (TException e) { throw new ConnectionFailureException("TException while getting notifications", e); } Index: hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatNotificationEvent.java =================================================================== --- hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatNotificationEvent.java (revision 1673556) +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatNotificationEvent.java (working copy) @@ -32,6 +32,8 @@ private String tableName; private String message; + public enum Scope { DB, TABLE, UNKNOWN }; + HCatNotificationEvent(NotificationEvent event) { eventId = event.getEventId(); eventTime = event.getEventTime(); @@ -45,6 +47,20 @@ return eventId; } + public Scope getEventScope() { + // Eventually, we want this to be a richer description of having + // a DB, TABLE, ROLE, etc scope. For now, we have a trivial impl + // of having only DB and TABLE scopes, as determined by whether + // or not the tableName is null. + if (dbName != null){ + if (tableName != null){ + return Scope.TABLE; + } + return Scope.DB; + } + return Scope.UNKNOWN; + } + public int getEventTime() { return eventTime; } Index: hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/Command.java =================================================================== --- hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/Command.java (revision 0) +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/Command.java (working copy) @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.api.repl; + +import org.apache.hadoop.io.Writable; + +import java.util.List; + +/** + * Interface that abstracts the notion of one atomic command to execute. + * If the command does not execute and raises some exception, then Command + * provides a conditional to check if the operation is intended to be + * retriable - i.e. whether the command is considered idempotent. If it is, + * then the user could attempt to redo the particular command they were + * running. If not, then they can check another conditional to check + * if their action is undo-able. If undoable, then they can then attempt + * to undo the action by asking the command how to undo it. If not, they + * can then in turn act upon the exception in whatever manner they see + * fit (typically by raising an error). + * + * We also have two more methods that help cleanup of temporary locations + * used by this Command. cleanupLocationsPerRetry() provides a list of + * directories that are intended to be cleaned up every time this Command + * needs to be retried. cleanupLocationsAfterEvent() provides a list of + * directories that should be cleaned up after the event for which this + * Command is generated is successfully processed. + */ +public interface Command extends Writable { + List get(); + boolean isRetriable(); + boolean isUndoable(); + List getUndo(); + List cleanupLocationsPerRetry(); + List cleanupLocationsAfterEvent(); + long getEventId(); +} Index: hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/HCatReplicationTaskIterator.java =================================================================== --- hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/HCatReplicationTaskIterator.java (revision 0) +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/HCatReplicationTaskIterator.java (working copy) @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.api.repl; + +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.NotificationEvent; +import org.apache.hive.hcatalog.api.HCatClient; +import org.apache.hive.hcatalog.api.HCatNotificationEvent; +import org.apache.hive.hcatalog.common.HCatException; + +import java.util.Iterator; + +public class HCatReplicationTaskIterator implements Iterator{ + private Iterator notifIter = null; + + private class HCatReplicationTaskIteratorNotificationFilter implements IMetaStoreClient.NotificationFilter { + + private String dbName; + private String tableName; + public HCatReplicationTaskIteratorNotificationFilter(String dbName, String tableName){ + this.dbName = dbName; + this.tableName = tableName; + } + @Override + public boolean accept(NotificationEvent event) { + if (event == null){ + return false; // get rid of trivial case first, so that we can safely assume non-null + } + if (this.dbName == null){ + return true; // if our dbName is null, we're interested in all wh events + } + if (this.dbName.equalsIgnoreCase(event.getDbName())){ + if ( + (this.tableName == null) + // if our dbName is equal, but tableName is blank, we're interested in this db-level event + || (this.tableName.equalsIgnoreCase(event.getTableName())) + // table level event that matches us + ){ + return true; + } + } + return false; + } + } + + public HCatReplicationTaskIterator( + HCatClient hcatClient, long eventFrom, int maxEvents, String dbName, String tableName) throws HCatException { + + init(hcatClient,eventFrom,maxEvents, new HCatReplicationTaskIteratorNotificationFilter(dbName,tableName)); + } + + public HCatReplicationTaskIterator( + HCatClient hcatClient, long eventFrom, int maxEvents, + IMetaStoreClient.NotificationFilter filter) throws HCatException{ + init(hcatClient,eventFrom,maxEvents,filter); + } + private void init(HCatClient hcatClient, long eventFrom, int maxEvents, IMetaStoreClient.NotificationFilter filter) throws HCatException { + // Simple implementation for now, this will later expand to do DAG evaluation. + this.notifIter = hcatClient.getNextNotification(eventFrom, maxEvents,filter).iterator(); + } + + @Override + public boolean hasNext() { + return notifIter.hasNext(); + } + + @Override + public ReplicationTask next() { + return ReplicationTask.create(notifIter.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("remove() not supported on HCatReplicationTaskIterator"); + } + + + +} + + + Index: hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/NoopReplicationTask.java =================================================================== --- hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/NoopReplicationTask.java (revision 0) +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/NoopReplicationTask.java (working copy) @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.api.repl; + +import org.apache.hive.hcatalog.api.HCatNotificationEvent; +import org.apache.hive.hcatalog.api.repl.commands.NoopCommand; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class is there to help testing, and to help initial development + * and will be the default Replication Task for under-development replication + * tasks to override. + * + * This is not intended to be a permanent class, and will likely move to the test + * package after initial implementation. + */ + +public class NoopReplicationTask extends ReplicationTask { + + List noopReturn = null; + + public NoopReplicationTask(HCatNotificationEvent event) { + super(event); + noopReturn = new ArrayList(); + noopReturn.add(new NoopCommand(event.getEventId())); + } + + @Override + public boolean needsStagingDirs() { + return false; + } + + @Override + public boolean isActionable(){ + return true; + } + + /** + * Returns a list of commands to send to a hive driver on the source warehouse + * @return a list of commands to send to a hive driver on the source warehouse + */ + @Override + public Iterable getSrcWhCommands() { + verifyActionable(); + return noopReturn; + } + + /** + * Returns a list of commands to send to a hive driver on the dest warehouse + * @return a list of commands to send to a hive driver on the dest warehouse + */ + @Override + public Iterable getDstWhCommands() { + verifyActionable(); + return noopReturn; + } + +} + Index: hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/ReplicationTask.java =================================================================== --- hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/ReplicationTask.java (revision 0) +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/ReplicationTask.java (working copy) @@ -0,0 +1,234 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.api.repl; + +import com.google.common.base.Function; +import org.apache.hive.hcatalog.api.HCatNotificationEvent; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.messaging.MessageFactory; + + +/** + * ReplicationTask captures the concept of what it'd take to replicate changes from + * one warehouse to another given a notification event that captures what changed. + */ +public abstract class ReplicationTask { + protected HCatNotificationEvent event; + protected StagingDirectoryProvider srcStagingDirProvider = null; + protected StagingDirectoryProvider dstStagingDirProvider = null; + protected Function tableNameMapping = null; + protected Function dbNameMapping = null; + + protected static MessageFactory messageFactory = MessageFactory.getInstance(); + + public interface Factory { + public ReplicationTask create(HCatNotificationEvent event); + } + + /** + * Dummy NoopFactory for testing, returns a NoopReplicationTask for all recognized events. + * Warning : this will eventually go away or move to the test section - it's intended only + * for integration testing purposes. + */ + public static class NoopFactory implements Factory { + @Override + public ReplicationTask create(HCatNotificationEvent event) { + // TODO : Java 1.7+ support using String with switches, but IDEs don't all seem to know that. + // If casing is fine for now. But we should eventually remove this. Also, I didn't want to + // create another enum just for this. + String eventType = event.getEventType(); + if (eventType.equals(HCatConstants.HCAT_CREATE_DATABASE_EVENT)) { + return new NoopReplicationTask(event); + } else if (eventType.equals(HCatConstants.HCAT_DROP_DATABASE_EVENT)) { + return new NoopReplicationTask(event); + } else if (eventType.equals(HCatConstants.HCAT_CREATE_TABLE_EVENT)) { + return new NoopReplicationTask(event); + } else if (eventType.equals(HCatConstants.HCAT_DROP_TABLE_EVENT)) { + return new NoopReplicationTask(event); + } else if (eventType.equals(HCatConstants.HCAT_ADD_PARTITION_EVENT)) { + return new NoopReplicationTask(event); + } else if (eventType.equals(HCatConstants.HCAT_DROP_PARTITION_EVENT)) { + return new NoopReplicationTask(event); + } else if (eventType.equals(HCatConstants.HCAT_ALTER_TABLE_EVENT)) { + return new NoopReplicationTask(event); + } else if (eventType.equals(HCatConstants.HCAT_ALTER_PARTITION_EVENT)) { + return new NoopReplicationTask(event); + } else if (eventType.equals(HCatConstants.HCAT_INSERT_EVENT)) { + return new NoopReplicationTask(event); + } else { + throw new IllegalStateException("Unrecognized Event type, no replication task available"); + } + } + } + + private static Factory factoryInstance = null; + private static Factory getFactoryInstance() { + if (factoryInstance == null){ + // TODO: Eventually, we'll have a bit here that looks at a config param to instantiate + // the appropriate factory, with EXIMFactory being the default - that allows + // others to implement their own ReplicationTask.Factory for other replication + // implementations. + // That addition will be brought in by the EXIMFactory patch. + factoryInstance = new NoopFactory(); + } + return factoryInstance; + } + + /** + * Factory method to return appropriate subtype of ReplicationTask for given event + * @param event HCatEventMessage returned by the notification subsystem + * @return corresponding ReplicationTask + */ + public static ReplicationTask create(HCatNotificationEvent event){ + if (event == null){ + throw new IllegalArgumentException("event should not be null"); + } + return getFactoryInstance().create(event); + } + + // Primary entry point is a factory method instead of ctor + // to allow for future ctor mutabulity in design + protected ReplicationTask(HCatNotificationEvent event) { + this.event = event; + } + + /** + * Returns the event that this ReplicationTask is attempting to replicate + * @return underlying event + */ + public HCatNotificationEvent getEvent(){ + return this.event; + } + + /** + * Returns true if the replication task in question needs to create staging + * directories to complete its operation. This will mean that you will need + * to copy these directories over to the destination warehouse for each + * source-destination warehouse pair. + * If this is true, you will need to call .withSrcStagingDirProvider(...) + * and .withDstStagingDirProvider(...) before this ReplicationTask is usable + */ + public abstract boolean needsStagingDirs(); + + /** + * Returns true if this ReplicationTask is prepared with all info it needs, and is + * ready to be used + */ + public boolean isActionable(){ + if (! this.needsStagingDirs()) { + return true; + } + if ((srcStagingDirProvider != null) && (dstStagingDirProvider != null)){ + return true; + } + return false; + } + + /** + * See {@link org.apache.hive.hcatalog.api.repl.StagingDirectoryProvider} + * @param srcStagingDirProvider Staging Directory Provider for the source warehouse + * @return this + */ + public ReplicationTask withSrcStagingDirProvider(StagingDirectoryProvider srcStagingDirProvider){ + this.srcStagingDirProvider = srcStagingDirProvider; + return this; + } + + /** + * See {@link org.apache.hive.hcatalog.api.repl.StagingDirectoryProvider} + * @param dstStagingDirProvider Staging Directory Provider for the destination warehouse + * @return this replication task + */ + public ReplicationTask withDstStagingDirProvider(StagingDirectoryProvider dstStagingDirProvider){ + this.dstStagingDirProvider = dstStagingDirProvider; + return this; + } + + /** + * Allows a user to specify a table name mapping, where the the function provided maps the name of + * the table in the source warehouse to the name of the table in the dest warehouse. It is expected + * that if the mapping does not exist, it should return the same name sent in. Or, if the function + * throws an IllegalArgumentException as well, a ReplicationTask will use the same key sent in. + * That way, the default will then be that the destination db name is the same as the src db name + * + * If you want to use a Map mapping instead of a Function, + * simply call this function as .withTableNameMapping(com.google.common.base.Functions.forMap(tableMap)) + * @param tableNameMapping + * @return this replication task + */ + public ReplicationTask withTableNameMapping(Function tableNameMapping){ + this.tableNameMapping = tableNameMapping; + return this; + } + + /** + * Allows a user to specify a db name mapping, where the the function provided maps the name of + * the db in the source warehouse to the name of the db in the dest warehouse. It is expected + * that if the mapping does not exist, it should return the same name sent in. Or, if the function + * throws an IllegalArgumentException as well, a ReplicationTask will use the same key sent in. + * That way, the default will then be that the destination db name is the same as the src db name + * + * If you want to use a Map mapping instead of a Function, + * simply call this function as .withDb(com.google.common.base.Functions.forMap(dbMap)) + * @param dbNameMapping + * @return this replication task + */ + public ReplicationTask withDbNameMapping(Function dbNameMapping){ + this.dbNameMapping = dbNameMapping; + return this; + } + + protected void verifyActionable() { + if (!this.isActionable()){ + throw new IllegalStateException("actionable command on task called when ReplicationTask is still not actionable."); + } + } + + /** + * Returns a Iterable to send to a hive driver on the source warehouse + * + * If you *need* a List instead, you can use guava's + * ImmutableList.copyOf(iterable) or Lists.newArrayList(iterable) to + * get the underlying list, but this defeats the purpose of making this + * interface an Iterable rather than a List, since it is very likely + * that the number of Commands returned here will cause your process + * to run OOM. + */ + abstract public Iterable getSrcWhCommands(); + + /** + * Returns a Iterable to send to a hive driver on the source warehouse + * + * If you *need* a List instead, you can use guava's + * ImmutableList.copyOf(iterable) or Lists.newArrayList(iterable) to + * get the underlying list, but this defeats the purpose of making this + * interface an Iterable rather than a List, since it is very likely + * that the number of Commands returned here will cause your process + * to run OOM. + */ + abstract public Iterable getDstWhCommands(); + + protected void validateEventType(HCatNotificationEvent event, String allowedEventType) { + if (event == null || !allowedEventType.equals(event.getEventType())){ + throw new IllegalStateException(this.getClass().getName() + " valid only for " + + allowedEventType + " events."); + } + } +} + Index: hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/ReplicationUtils.java =================================================================== --- hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/ReplicationUtils.java (revision 0) +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/ReplicationUtils.java (working copy) @@ -0,0 +1,226 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.api.repl; + +import com.google.common.base.Function; +import com.google.common.base.Objects; +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.io.IOExceptionWithCause; +import org.apache.hive.hcatalog.api.HCatDatabase; +import org.apache.hive.hcatalog.api.HCatPartition; +import org.apache.hive.hcatalog.api.HCatTable; +import org.apache.hive.hcatalog.data.ReaderWriter; + +import javax.annotation.Nullable; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.Map; + +public class ReplicationUtils { + + private final static String REPL_STATE_ID = "repl.last.id"; // TODO : define in ReplicationSpec, and point this to that once that's patched in. + + private ReplicationUtils(){ + // dummy private constructor, since this class is a collection of static utility methods. + } + + /** + * Gets the last known replication state of this db. This is + * applicable only if it is the destination of a replication + * and has had data replicated into it via imports previously. + * Defaults to 0. + */ + public static long getLastReplicationId(HCatDatabase db){ + Map props = db.getProperties(); + if (props != null){ + if (props.containsKey(REPL_STATE_ID)){ + return Long.parseLong(props.get(REPL_STATE_ID)); + } + } + return 0l; // default is to return earliest possible state. + } + + + /** + * Gets the last known replication state of the provided table. This + * is applicable only if it is the destination of a replication + * and has had data replicated into it via imports previously. + * Defaults to 0. + */ + public static long getLastReplicationId(HCatTable tbl) { + Map tblProps = tbl.getTblProps(); + if (tblProps != null){ + if (tblProps.containsKey(REPL_STATE_ID)){ + return Long.parseLong(tblProps.get(REPL_STATE_ID)); + } + } + return 0l; // default is to return earliest possible state. + } + + /** + * Gets the last known replication state of the provided partition. + * This is applicable only if it is the destination of a replication + * and has had data replicated into it via imports previously. + * If that is not available, but parent table is provided, + * defaults to parent table's replication state. If that is also + * unknown, defaults to 0. + */ + public static long getLastReplicationId(HCatPartition ptn, @Nullable HCatTable parentTable) { + Map parameters = ptn.getParameters(); + if (parameters != null){ + if (parameters.containsKey(REPL_STATE_ID)){ + return Long.parseLong(parameters.get(REPL_STATE_ID)); + } + } + + if (parentTable != null){ + return getLastReplicationId(parentTable); + } + return 0l; // default is to return earliest possible state. + } + + /** + * Used to generate a unique key for a combination of given event id, dbname, + * tablename and partition keyvalues. This is used to feed in a name for creating + * staging directories for exports and imports. This should be idempotent given + * the same values, i.e. hashcode-like, but at the same time, be guaranteed to be + * different for every possible partition, while being "readable-ish". Basically, + * we concat the alphanumberic versions of all of the above, along with a hashcode + * of the db, tablename and ptn key-value pairs + */ + public static String getUniqueKey(long eventId, String db, String table, Map ptnDesc) { + StringBuilder sb = new StringBuilder(); + sb.append(eventId); + sb.append('.'); + sb.append(toStringWordCharsOnly(db)); + sb.append('.'); + sb.append(toStringWordCharsOnly(table)); + sb.append('.'); + sb.append(toStringWordCharsOnly(ptnDesc)); + sb.append('.'); + sb.append(Objects.hashCode(db, table, ptnDesc)); + return sb.toString(); + } + + /** + * Return alphanumeric(and '_') representation of a Map + * + */ + private static String toStringWordCharsOnly(Map map) { + if (map == null){ + return "null"; + } + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (Map.Entry e : map.entrySet()){ + if (!first){ + sb.append(','); + } + sb.append(toStringWordCharsOnly(e.getKey())); + sb.append('='); + sb.append(toStringWordCharsOnly(e.getValue())); + first = false; + } + return sb.toString(); + } + + /** + * Return alphanumeric(and '_') chars only of a string, lowercased + */ + public static String toStringWordCharsOnly(String s){ + return (s == null) ? "null" : s.replaceAll("[\\W]", "").toLowerCase(); + } + + /** + * Return a mapping from a given map function if available, and the key itself if not. + */ + public static String mapIfMapAvailable(String s, Function mapping){ + try { + return mapping.apply(s); + } catch (IllegalArgumentException iae){ + // The key wasn't present in the mapping, return the key itself, since no mapping was available + return s; + } + } + + public static String partitionDescriptor(Map ptnDesc) { + StringBuilder sb = new StringBuilder(); + if ((ptnDesc != null) && (!ptnDesc.isEmpty())){ + boolean first = true; + sb.append(" PARTITION ("); + for (Map.Entry e : ptnDesc.entrySet()){ + if (!first){ + sb.append(", "); + } else { + first = false; + } + sb.append(e.getKey()); // TODO : verify if any quoting is needed for keys + sb.append('='); + sb.append('"'); + sb.append(e.getValue()); // TODO : verify if any escaping is needed for values + sb.append('"'); + } + sb.append(')'); + } + return sb.toString(); + } + + /** + * Command implements Writable, but that's not terribly easy to use compared + * to String, even if it plugs in easily into the rest of Hadoop. Provide + * utility methods to easily serialize and deserialize Commands + * + * serializeCommand returns a base64 String representation of given command + */ + public static String serializeCommand(Command command) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutput dataOutput = new DataOutputStream(baos); + ReaderWriter.writeDatum(dataOutput,command.getClass().getName()); + command.write(dataOutput); + return Base64.encodeBase64URLSafeString(baos.toByteArray()); + } + + /** + * Command implements Writable, but that's not terribly easy to use compared + * to String, even if it plugs in easily into the rest of Hadoop. Provide + * utility methods to easily serialize and deserialize Commands + * + * deserializeCommand instantiates a concrete Command and initializes it, + * given a base64 String representation of it. + */ + public static Command deserializeCommand(String s) throws IOException { + DataInput dataInput = new DataInputStream(new ByteArrayInputStream(Base64.decodeBase64(s))); + String clazz = (String) ReaderWriter.readDatum(dataInput); + Command cmd; + try { + cmd = (Command)Class.forName(clazz).newInstance(); + } catch (Exception e) { + throw new IOExceptionWithCause("Error instantiating class "+clazz,e); + } + cmd.readFields(dataInput); + return cmd; + } + +} Index: hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/StagingDirectoryProvider.java =================================================================== --- hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/StagingDirectoryProvider.java (revision 0) +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/StagingDirectoryProvider.java (working copy) @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.api.repl; + +/** + * Interface for a client to provide a Staging Directory specification + */ +public interface StagingDirectoryProvider { + + /** + * Return a temporary staging directory for a given key + * @param key key for the directory, usually a name of a partition + * Note that when overriding this method, no guarantees are made about the + * contents of the key, other than that is unique per partition. + * @return A parth specification to use as a temporary staging directory + */ + String getStagingDirectory(String key); + + /** + * Trivial implementation of this interface - creates + */ + public class TrivialImpl implements StagingDirectoryProvider { + + String prefix = null; + + /** + * Trivial implementation of StagingDirectoryProvider which takes a temporary directory + * and creates directories inside that for each key. Note that this is intended as a + * trivial implementation, and if any further "advanced" behaviour is desired, + * it is better that the user roll their own. + * + * @param base temp directory inside which other tmp dirs are created + * @param separator path separator. Usually should be "/" + */ + public TrivialImpl(String base,String separator){ + this.prefix = base + separator; + } + + @Override + public String getStagingDirectory(String key) { + return prefix + key; + } + } +} Index: hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/commands/NoopCommand.java =================================================================== --- hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/commands/NoopCommand.java (revision 0) +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/repl/commands/NoopCommand.java (working copy) @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.api.repl.commands; + + +import org.apache.hive.hcatalog.api.repl.Command; +import org.apache.hive.hcatalog.data.ReaderWriter; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * This class is there to help testing, and to help initial development + * and will be the default Command for NoopReplicationTask + * + * This is not intended to be a permanent class, and will likely move to the test + * package after initial implementation. + */ + +public class NoopCommand implements Command { + private long eventId; + + public NoopCommand(){ + // trivial ctor to support Writable reflections instantiation + // do not expect to use this object as-is, unless you call + // readFields after using this ctor + } + + public NoopCommand(long eventId){ + this.eventId = eventId; + } + + @Override + public List get() { + return new ArrayList(); + } + + @Override + public boolean isRetriable() { + return true; + } + + @Override + public boolean isUndoable() { + return true; + } + + @Override + public List getUndo() { + return new ArrayList(); + } + + @Override + public List cleanupLocationsPerRetry() { + return new ArrayList(); + } + + @Override + public List cleanupLocationsAfterEvent() { + return new ArrayList(); + } + + @Override + public long getEventId() { + return eventId; + } + + @Override + public void write(DataOutput dataOutput) throws IOException { + ReaderWriter.writeDatum(dataOutput, Long.valueOf(eventId)); + } + + @Override + public void readFields(DataInput dataInput) throws IOException { + eventId = ((Long)ReaderWriter.readDatum(dataInput)).longValue(); + } +} + Index: hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java =================================================================== --- hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java (revision 1673556) +++ hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java (working copy) @@ -18,18 +18,24 @@ */ package org.apache.hive.hcatalog.api; +import java.io.IOException; import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; import java.util.EnumSet; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; +import com.google.common.base.Function; +import com.google.common.collect.Iterables; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.NotificationEvent; import org.apache.hadoop.hive.metastore.api.PartitionEventType; import org.apache.hadoop.hive.ql.WindowsPathUtil; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; @@ -42,12 +48,17 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe; import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hive.hcatalog.api.repl.Command; +import org.apache.hive.hcatalog.api.repl.ReplicationTask; +import org.apache.hive.hcatalog.api.repl.ReplicationUtils; +import org.apache.hive.hcatalog.api.repl.StagingDirectoryProvider; import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type; import org.apache.hive.hcatalog.NoExitSecurityManager; +import org.apache.hive.hcatalog.listener.DbNotificationListener; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -63,6 +74,8 @@ import org.apache.hadoop.util.Shell; +import javax.annotation.Nullable; + public class TestHCatClient { private static final Logger LOG = LoggerFactory.getLogger(TestHCatClient.class); private static final String msPort = "20101"; @@ -71,6 +84,8 @@ private static final String replicationTargetHCatPort = "20102"; private static HiveConf replicationTargetHCatConf; private static SecurityManager securityManager; + private static boolean useExternalMS = false; + private static boolean useExternalMSForReplication = false; private static class RunMS implements Runnable { @@ -101,18 +116,28 @@ @AfterClass public static void tearDown() throws Exception { - LOG.info("Shutting down metastore."); - System.setSecurityManager(securityManager); + if (!useExternalMS) { + LOG.info("Shutting down metastore."); + System.setSecurityManager(securityManager); + } } @BeforeClass public static void startMetaStoreServer() throws Exception { hcatConf = new HiveConf(TestHCatClient.class); + String metastoreUri = System.getProperty(HiveConf.ConfVars.METASTOREURIS.varname); + if (metastoreUri != null) { + hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, metastoreUri); + useExternalMS = true; + return; + } if (Shell.WINDOWS) { WindowsPathUtil.convertPathsFromWindowsToHdfs(hcatConf); } + System.setProperty(HiveConf.ConfVars.METASTORE_EVENT_LISTENERS.varname, + DbNotificationListener.class.getName()); // turn on db notification listener on metastore Thread t = new Thread(new RunMS(msPort)); t.start(); Thread.sleep(10000); @@ -162,8 +187,12 @@ assertTrue(testDb.getProperties().size() == 0); String warehouseDir = System .getProperty("test.warehouse.dir", "/user/hive/warehouse"); - String expectedDir = warehouseDir.replaceFirst("pfile:///", "pfile:/"); - assertEquals(expectedDir + "/" + db + ".db", testDb.getLocation()); + if (useExternalMS) { + assertTrue(testDb.getLocation().matches(".*" + "/" + db + ".db")); + } else { + String expectedDir = warehouseDir.replaceFirst("pfile:///", "pfile:/"); + assertEquals(expectedDir + "/" + db + ".db", testDb.getLocation()); + } ArrayList cols = new ArrayList(); cols.add(new HCatFieldSchema("id", Type.INT, "id comment")); cols.add(new HCatFieldSchema("value", Type.STRING, "value comment")); @@ -213,7 +242,7 @@ assertEquals("checking " + serdeConstants.SERIALIZATION_NULL_FORMAT, Character.toString('\006'), table2.getSerdeParams().get(serdeConstants.SERIALIZATION_NULL_FORMAT)); - assertEquals((expectedDir + "/" + db + ".db/" + tableTwo).toLowerCase(), table2.getLocation().toLowerCase()); + assertTrue(table2.getLocation().toLowerCase().matches(".*" + ("/" + db + ".db/" + tableTwo).toLowerCase())); HCatCreateTableDesc tableDesc3 = HCatCreateTableDesc.create(db, tableThree, cols).fileFormat("orcfile").build(); @@ -372,7 +401,7 @@ .ifNotExists(true).location("/tmp/" + dbName).build(); client.createDatabase(dbDesc); HCatDatabase newDB = client.getDatabase(dbName); - assertTrue(newDB.getLocation().equalsIgnoreCase("file:/tmp/" + dbName)); + assertTrue(newDB.getLocation().matches(".*/tmp/" + dbName)); client.close(); } @@ -793,6 +822,113 @@ } /** + * Test for event-based replication scenario + * + * Does not test if replication actually happened, merely tests if we're able to consume a repl task + * iter appropriately, calling all the functions expected of the interface, without errors. + */ + @Test + public void testReplicationTaskIter() throws Exception { + + HCatClient sourceMetastore = HCatClient.create(new Configuration(hcatConf)); + + List notifs = sourceMetastore.getNextNotification( + 0, 0, new IMetaStoreClient.NotificationFilter() { + @Override + public boolean accept(NotificationEvent event) { + return true; + } + }); + for(HCatNotificationEvent n : notifs){ + LOG.info("notif from dblistener:" + n.getEventId() + + ":" + n.getEventTime() + ",t:" + n.getEventType() + ",o:" + n.getDbName() + "." + n.getTableName()); + } + + Iterator taskIter = sourceMetastore.getReplicationTasks(0, 0, "mydb", null); + while(taskIter.hasNext()){ + ReplicationTask task = taskIter.next(); + HCatNotificationEvent n = task.getEvent(); + LOG.info("notif from tasks:" + n.getEventId() + + ":" + n.getEventTime() + ",t:" + n.getEventType() + ",o:" + n.getDbName() + "." + n.getTableName() + + ",s:" + n.getEventScope()); + LOG.info("task :" + task.getClass().getName()); + if (task.needsStagingDirs()){ + StagingDirectoryProvider provider = new StagingDirectoryProvider() { + @Override + public String getStagingDirectory(String key) { + LOG.info("getStagingDirectory(" + key + ") called!"); + return "/tmp/" + key.replaceAll(" ","_"); + } + }; + task + .withSrcStagingDirProvider(provider) + .withDstStagingDirProvider(provider); + } + if (task.isActionable()){ + LOG.info("task was actionable!"); + Function commandDebugPrinter = new Function() { + @Override + public String apply(@Nullable Command cmd) { + StringBuilder sb = new StringBuilder(); + String serializedCmd = null; + try { + serializedCmd = ReplicationUtils.serializeCommand(cmd); + } catch (IOException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + sb.append("SERIALIZED:"+serializedCmd+"\n"); + Command command = null; + try { + command = ReplicationUtils.deserializeCommand(serializedCmd); + } catch (IOException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + sb.append("CMD:[" + command.getClass().getName() + "]\n"); + sb.append("EVENTID:[" +command.getEventId()+"]\n"); + for (String s : command.get()) { + sb.append("CMD:" + s); + sb.append("\n"); + } + sb.append("Retriable:" + command.isRetriable() + "\n"); + sb.append("Undoable:" + command.isUndoable() + "\n"); + if (command.isUndoable()) { + for (String s : command.getUndo()) { + sb.append("UNDO:" + s); + sb.append("\n"); + } + } + List locns = command.cleanupLocationsPerRetry(); + sb.append("cleanupLocationsPerRetry entries :" + locns.size()); + for (String s : locns){ + sb.append("RETRY_CLEANUP:"+s); + sb.append("\n"); + } + locns = command.cleanupLocationsAfterEvent(); + sb.append("cleanupLocationsAfterEvent entries :" + locns.size()); + for (String s : locns){ + sb.append("AFTER_EVENT_CLEANUP:"+s); + sb.append("\n"); + } + return sb.toString(); + } + }; + LOG.info("On src:"); + for (String s : Iterables.transform(task.getSrcWhCommands(), commandDebugPrinter)){ + LOG.info(s); + } + LOG.info("On dest:"); + for (String s : Iterables.transform(task.getDstWhCommands(), commandDebugPrinter)){ + LOG.info(s); + } + } else { + LOG.info("task was not actionable."); + } + } + } + + /** * Test for detecting schema-changes for an HCatalog table, across 2 different HCat instances. * A table is created with the same schema on 2 HCat instances. The table-schema is modified on the source HCat * instance (columns, I/O formats, SerDe definitions, etc.). The table metadata is compared between source Index: hcatalog/webhcat/svr/src/main/config/webhcat-default.xml =================================================================== --- hcatalog/webhcat/svr/src/main/config/webhcat-default.xml (revision 1673556) +++ hcatalog/webhcat/svr/src/main/config/webhcat-default.xml (working copy) @@ -39,7 +39,7 @@ templeton.libjars - ${env.TEMPLETON_HOME}/share/webhcat/svr/lib/zookeeper-3.4.3.jar + ${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar Jars to add to the classpath. @@ -106,10 +106,24 @@ templeton.hive.path hive-0.11.0.tar.gz/hive-0.11.0/bin/hive - The path to the Hive executable. + The path to the Hive executable. Applies only if templeton.hive.archive is defined. + templeton.hive.extra.files + + The resources in this list will be localized to the node running LaunchMapper and added to HADOOP_CLASSPTH + before launching 'hive' command. If the path /foo/bar is a directory, the contents of the the entire dir will be localized + and ./bar/* will be added to HADOOP_CLASSPATH. Note that since classpath path processing does not recurse into subdirectories, + the paths in this property may be overlapping. For example, to run Hive on Tez jobs, 3 items need to be localized: + /tez-client/conf/tez-site.xml,/tez-client/,/tez-client/lib. In this example, "./tez-site.xml:./tez-client/*:./lib/*" will be added to + HADOOP_CLASSPATH. + This can be used to specify config files, Tez artifacts, etc. This will be sent -files option of hadoop jar command thus + each path is interpreted by Generic Option Parser. It can be local or hdfs path. + + + + templeton.hive.home hive-0.14.0-SNAPSHOT-bin.tar.gz/hive-0.14.0-SNAPSHOT-bin @@ -197,7 +211,33 @@ + + + templeton.exec.envs HADOOP_PREFIX,HADOOP_HOME,JAVA_HOME,HIVE_HOME The environment variables passed through to exec. Index: hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java =================================================================== --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java (revision 1673556) +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java (working copy) @@ -35,7 +35,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.SystemVariables; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.VersionInfo; import org.apache.hive.hcatalog.templeton.tool.JobState; @@ -104,6 +103,8 @@ public static final String HIVE_ARCHIVE_NAME = "templeton.hive.archive"; public static final String HIVE_PATH_NAME = "templeton.hive.path"; public static final String MAPPER_MEMORY_MB = "templeton.mapper.memory.mb"; + public static final String MR_AM_MEMORY_MB = "templeton.mr.am.memory.mb"; + /** * see webhcat-default.xml */ @@ -130,6 +131,8 @@ public static final String OVERRIDE_JARS_ENABLED = "templeton.override.enabled"; public static final String TEMPLETON_CONTROLLER_MR_CHILD_OPTS = "templeton.controller.mr.child.opts"; + public static final String TEMPLETON_CONTROLLER_MR_AM_JAVA_OPTS + = "templeton.controller.mr.am.java.opts"; public static final String KERBEROS_SECRET = "templeton.kerberos.secret"; public static final String KERBEROS_PRINCIPAL = "templeton.kerberos.principal"; @@ -148,7 +151,14 @@ = "mapred.map.tasks.speculative.execution"; public static final String HADOOP_CHILD_JAVA_OPTS = "mapred.child.java.opts"; public static final String HADOOP_MAP_MEMORY_MB = "mapreduce.map.memory.mb"; + public static final String HADOOP_MR_AM_JAVA_OPTS = "yarn.app.mapreduce.am.command-opts"; + public static final String HADOOP_MR_AM_MEMORY_MB = "yarn.app.mapreduce.am.resource.mb"; public static final String UNIT_TEST_MODE = "templeton.unit.test.mode"; + /** + * comma-separated list of artifacts to add to HADOOP_CLASSPATH evn var in + * LaunchMapper before launching Hive command + */ + public static final String HIVE_EXTRA_FILES = "templeton.hive.extra.files"; private static final Log LOG = LogFactory.getLog(AppConfig.class); @@ -313,7 +323,13 @@ public String controllerMRChildOpts() { return get(TEMPLETON_CONTROLLER_MR_CHILD_OPTS); } + public String controllerAMChildOpts() { + return get(TEMPLETON_CONTROLLER_MR_AM_JAVA_OPTS); + } public String mapperMemoryMb() { return get(MAPPER_MEMORY_MB); } + public String amMemoryMb() { + return get(MR_AM_MEMORY_MB); + } /** * @see #HIVE_PROPS_NAME Index: hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java =================================================================== --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java (revision 1673556) +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java (working copy) @@ -27,6 +27,7 @@ import java.util.Map; import org.apache.commons.exec.ExecuteException; +import org.apache.hadoop.fs.Path; import org.apache.hive.hcatalog.templeton.tool.JobSubmissionConstants; import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob; import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; @@ -117,7 +118,7 @@ private List makeBasicArgs(String execute, String srcFile, String otherFiles, String statusdir, String completedUrl, boolean enablelog) - throws URISyntaxException, FileNotFoundException, IOException, + throws URISyntaxException, IOException, InterruptedException { ArrayList args = new ArrayList(); @@ -142,6 +143,30 @@ args.add(appConf.hiveArchive()); } + //ship additional artifacts, for example for Tez + String extras = appConf.get(AppConfig.HIVE_EXTRA_FILES); + if(extras != null && extras.length() > 0) { + boolean foundFiles = false; + for(int i = 0; i < args.size(); i++) { + if(FILES.equals(args.get(i))) { + String value = args.get(i + 1); + args.set(i + 1, value + "," + extras); + foundFiles = true; + } + } + if(!foundFiles) { + args.add(FILES); + args.add(extras); + } + String[] extraFiles = appConf.getStrings(AppConfig.HIVE_EXTRA_FILES); + StringBuilder extraFileNames = new StringBuilder(); + //now tell LaunchMapper which files it should add to HADOOP_CLASSPATH + for(String file : extraFiles) { + Path p = new Path(file); + extraFileNames.append(p.getName()).append(","); + } + addDef(args, JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS, extraFileNames.toString()); + } return args; } } Index: hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java =================================================================== --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java (revision 1673556) +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java (working copy) @@ -28,6 +28,10 @@ * http://hadoop.apache.org/docs/r1.0.4/commands_manual.html#Generic+Options */ public static final String ARCHIVES = "-archives"; + /** + * http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/CommandsManual.html#Generic_Options + */ + public static final String FILES = "-files"; protected AppConfig appConf; Index: hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java =================================================================== --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java (revision 1673556) +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java (working copy) @@ -31,6 +31,12 @@ public static final String EXIT_FNAME = "exit"; public static final int WATCHER_TIMEOUT_SECS = 10; public static final int KEEP_ALIVE_MSEC = 60 * 1000; + /** + * A comma-separated list of files to be added to HADOOP_CLASSPATH in + * {@link org.apache.hive.hcatalog.templeton.tool.LaunchMapper}. Used to localize additional + * artifacts for job submission requests. + */ + public static final String HADOOP_CLASSPATH_EXTRAS = "templeton.hadoop.classpath.extras"; /* * The = sign in the string for TOKEN_FILE_ARG_PLACEHOLDER is required because * org.apache.hadoop.util.GenericOptionsParser.preProcessForWindows() prepares Index: hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java =================================================================== --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java (revision 1673556) +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java (working copy) @@ -21,6 +21,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.classification.InterfaceAudience; @@ -33,7 +34,6 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; -import org.apache.hive.hcatalog.templeton.AppConfig; import org.apache.hive.hcatalog.templeton.BadParam; import org.apache.hive.hcatalog.templeton.LauncherDelegator; @@ -115,6 +115,32 @@ } } } + private static void handleHadoopClasspathExtras(Configuration conf, Map env) + throws IOException { + if(!TempletonUtils.isset(conf.get(JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS))) { + return; + } + LOG.debug(HADOOP_CLASSPATH_EXTRAS + "=" + conf.get(HADOOP_CLASSPATH_EXTRAS)); + String[] files = conf.getStrings(HADOOP_CLASSPATH_EXTRAS); + StringBuilder paths = new StringBuilder(); + FileSystem fs = FileSystem.getLocal(conf);//these have been localized already + for(String f : files) { + Path p = new Path(f); + FileStatus fileStatus = fs.getFileStatus(p); + paths.append(f); + if(fileStatus.isDirectory()) { + paths.append(File.separator).append("*"); + } + paths.append(File.pathSeparator); + } + paths.setLength(paths.length() - 1); + if(TempletonUtils.isset(System.getenv("HADOOP_CLASSPATH"))) { + env.put("HADOOP_CLASSPATH", System.getenv("HADOOP_CLASSPATH") + File.pathSeparator + paths); + } + else { + env.put("HADOOP_CLASSPATH", paths.toString()); + } + } protected Process startJob(Context context, String user, String overrideClasspath) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); @@ -135,6 +161,7 @@ Map env = TempletonUtils.hadoopUserEnv(user, overrideClasspath); handlePigEnvVars(conf, env); handleSqoop(conf, env); + handleHadoopClasspathExtras(conf, env); List jarArgsList = new LinkedList(Arrays.asList(jarArgs)); handleTokenFile(jarArgsList, JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER, "mapreduce.job.credentials.binary"); handleTokenFile(jarArgsList, JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER_TEZ, "tez.credentials.path"); Index: hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java =================================================================== --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java (revision 1673556) +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java (working copy) @@ -18,9 +18,7 @@ */ package org.apache.hive.hcatalog.templeton.tool; -import java.io.File; import java.io.IOException; -import java.net.URI; import java.security.PrivilegedExceptionAction; import java.util.Arrays; @@ -28,13 +26,9 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobClient; @@ -47,7 +41,6 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Tool; import org.apache.hive.hcatalog.templeton.AppConfig; -import org.apache.hive.hcatalog.templeton.Main; import org.apache.hive.hcatalog.templeton.SecureProxySupport; import org.apache.hive.hcatalog.templeton.UgiFactory; import org.apache.thrift.TException; @@ -114,6 +107,15 @@ if(memoryMb != null && memoryMb.length() != 0) { conf.set(AppConfig.HADOOP_MAP_MEMORY_MB, memoryMb); } + String amMemoryMB = appConf.amMemoryMb(); + if (amMemoryMB != null && !amMemoryMB.isEmpty()) { + conf.set(AppConfig.HADOOP_MR_AM_MEMORY_MB, amMemoryMB); + } + String amJavaOpts = appConf.controllerAMChildOpts(); + if (amJavaOpts != null && !amJavaOpts.isEmpty()) { + conf.set(AppConfig.HADOOP_MR_AM_JAVA_OPTS, amJavaOpts); + } + String user = UserGroupInformation.getCurrentUser().getShortUserName(); conf.set("user.name", user); Job job = new Job(conf); Index: hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java =================================================================== --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java (revision 1673556) +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java (working copy) @@ -76,32 +76,31 @@ } } /** - * Print files and directories in current directory. Will list files in the sub-directory (only 1 level deep) - * time honored tradition in WebHCat of borrowing from Oozie + * Print files and directories in current {@code dir}. */ - private static void printContentsOfDir(String dir) { + private static StringBuilder printContentsOfDir(String dir, int depth, StringBuilder sb) { + StringBuilder indent = new StringBuilder(); + for(int i = 0; i < depth; i++) { + indent.append("--"); + } File folder = new File(dir); - StringBuilder sb = new StringBuilder("Files in '").append(dir).append("' dir:").append(folder.getAbsolutePath()).append('\n'); + sb.append(indent).append("Files in '").append(dir).append("' dir:").append(folder.getAbsolutePath()).append('\n'); File[] listOfFiles = folder.listFiles(); + if(listOfFiles == null) { + return sb; + } for (File fileName : listOfFiles) { if (fileName.isFile()) { - sb.append("File: ").append(fileName.getName()).append('\n'); + sb.append(indent).append("File: ").append(fileName.getName()).append('\n'); } else if (fileName.isDirectory()) { - sb.append("Dir: ").append(fileName.getName()).append('\n'); - File subDir = new File(fileName.getName()); - File[] moreFiles = subDir.listFiles(); - for (File subFileName : moreFiles) { - if (subFileName.isFile()) { - sb.append("--File: ").append(subFileName.getName()).append('\n'); - } - else if (subFileName.isDirectory()) { - sb.append("--Dir: ").append(subFileName.getName()).append('\n'); - } - } + printContentsOfDir(fileName.getName(), depth+1, sb); } } - LOG.info(sb.toString()); + return sb; } + private static void printContentsOfDir(String dir) { + LOG.info(printContentsOfDir(dir, 0, new StringBuilder()).toString()); + } } Index: itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java =================================================================== --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java (revision 0) +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java (working copy) @@ -0,0 +1,193 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.benchmark.vectorization; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColDivideDoubleColumn; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.util.Random; +import java.util.concurrent.TimeUnit; + +@State(Scope.Benchmark) +public class VectorizationBench { + /** + * This test measures the performance for vectorization. + *

+ * This test uses JMH framework for benchmarking. + * You may execute this benchmark tool using JMH command line in different ways: + *

+ * To use the settings shown in the main() function, use: + * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.vectorization.VectorizationBench + *

+ * To use the default settings used by JMH, use: + * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.vectorization VectorizationBench + *

+ * To specify different parameters, use: + * - This command will use 10 warm-up iterations, 5 test iterations, and 2 forks. And it will + * display the Average Time (avgt) in Microseconds (us) + * - Benchmark mode. Available modes are: + * [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, All/all] + * - Output time unit. Available time units are: [m, s, ms, us, ns]. + *

+ * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.vectorization VectorizationBench + * -wi 10 -i 5 -f 2 -bm avgt -tu us + */ + private static LongColumnVector longColumnVector = new LongColumnVector(); + private static LongColumnVector dupLongColumnVector = new LongColumnVector(); + private static DoubleColumnVector doubleColumnVector = new DoubleColumnVector(); + private static DoubleColumnVector dupDoubleColumnVector = new DoubleColumnVector(); + + @BenchmarkMode(Mode.AverageTime) + @Fork(1) + @State(Scope.Thread) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public static abstract class AbstractExpression { + protected VectorExpression expression; + protected VectorizedRowBatch rowBatch; + + protected VectorizedRowBatch buildRowBatch(ColumnVector output, int colNum, ColumnVector... + cols) { + VectorizedRowBatch rowBatch = new VectorizedRowBatch(colNum + 1); + for (int i = 0; i < cols.length; i++) { + rowBatch.cols[i] = cols[i]; + } + rowBatch.cols[colNum] = output; + return rowBatch; + } + + @Setup + public abstract void setup(); + + @Benchmark + @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) + public void bench() { + expression.evaluate(rowBatch); + } + } + + public static class DoubleAddDoubleExpr extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, doubleColumnVector, + dupDoubleColumnVector); + expression = new DoubleColAddDoubleColumn(0, 1, 2); + } + } + + public static class LongAddLongExpr extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, longColumnVector, dupLongColumnVector); + expression = new LongColAddLongColumn(0, 1, 2); + } + } + + public static class LongAddDoubleExpr extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, longColumnVector, doubleColumnVector); + expression = new LongColAddDoubleColumn(0, 1, 2); + } + } + + public static class DoubleAddLongExpr extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, doubleColumnVector, longColumnVector); + expression = new DoubleColAddLongColumn(0, 1, 2); + } + } + + public static class DoubleDivideDoubleExpr extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, doubleColumnVector, + dupDoubleColumnVector); + expression = new DoubleColDivideDoubleColumn(0, 1, 2); + } + } + + public static class LongDivideLongExpr extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, longColumnVector, + dupLongColumnVector); + expression = new LongColDivideLongColumn(0, 1, 2); + } + } + + public static class DoubleDivideLongExpr extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, doubleColumnVector, + longColumnVector); + expression = new DoubleColDivideLongColumn(0, 1, 2); + } + } + + public static class LongDivideDoubleExpr extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, longColumnVector, + doubleColumnVector); + expression = new LongColDivideDoubleColumn(0, 1, 2); + } + } + + @Setup(Level.Trial) + public void initialColumnVectors() { + Random random = new Random(); + + dupLongColumnVector.fill(random.nextLong()); + dupDoubleColumnVector.fill(random.nextDouble()); + for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { + doubleColumnVector.vector[i] = random.nextDouble(); + longColumnVector.vector[i] = random.nextLong(); + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder().include(".*" + VectorizationBench.class.getSimpleName() + + ".*").build(); + new Runner(opt).run(); + } +} \ No newline at end of file Index: itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestHiveAuthFactory.java =================================================================== --- itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestHiveAuthFactory.java (revision 1673556) +++ itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestHiveAuthFactory.java (working copy) @@ -41,26 +41,48 @@ } /** - * Verify that delegation token manager is started with no exception + * Verify that delegation token manager is started with no exception for MemoryTokenStore * @throws Exception */ @Test - public void testStartTokenManager() throws Exception { + public void testStartTokenManagerForMemoryTokenStore() throws Exception { hiveConf.setVar(ConfVars.HIVE_SERVER2_AUTHENTICATION, HiveAuthFactory.AuthTypes.KERBEROS.getAuthName()); String principalName = miniHiveKdc.getFullHiveServicePrincipal(); System.out.println("Principal: " + principalName); - + hiveConf.setVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL, principalName); String keyTabFile = miniHiveKdc.getKeyTabFile(miniHiveKdc.getHiveServicePrincipal()); System.out.println("keyTabFile: " + keyTabFile); Assert.assertNotNull(keyTabFile); hiveConf.setVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB, keyTabFile); - System.out.println("rawStoreClassName =" + hiveConf.getVar(ConfVars.METASTORE_RAW_STORE_IMPL)); + HiveAuthFactory authFactory = new HiveAuthFactory(hiveConf); + Assert.assertNotNull(authFactory); + Assert.assertEquals("org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory", + authFactory.getAuthTransFactory().getClass().getName()); + } + /** + * Verify that delegation token manager is started with no exception for DBTokenStore + * @throws Exception + */ + @Test + public void testStartTokenManagerForDBTokenStore() throws Exception { + hiveConf.setVar(ConfVars.HIVE_SERVER2_AUTHENTICATION, HiveAuthFactory.AuthTypes.KERBEROS.getAuthName()); + String principalName = miniHiveKdc.getFullHiveServicePrincipal(); + System.out.println("Principal: " + principalName); + + hiveConf.setVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL, principalName); + String keyTabFile = miniHiveKdc.getKeyTabFile(miniHiveKdc.getHiveServicePrincipal()); + System.out.println("keyTabFile: " + keyTabFile); + Assert.assertNotNull(keyTabFile); + hiveConf.setVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB, keyTabFile); + + hiveConf.setVar(ConfVars.METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS, "org.apache.hadoop.hive.thrift.DBTokenStore"); + HiveAuthFactory authFactory = new HiveAuthFactory(hiveConf); Assert.assertNotNull(authFactory); - Assert.assertEquals("org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory", + Assert.assertEquals("org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server$TUGIAssumingTransportFactory", authFactory.getAuthTransFactory().getClass().getName()); } } Index: itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreEventListener.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreEventListener.java (revision 1673556) +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreEventListener.java (working copy) @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; +import com.google.common.collect.Lists; import junit.framework.TestCase; import org.apache.hadoop.hive.cli.CliSessionState; @@ -296,7 +297,8 @@ AddPartitionEvent partEvent = (AddPartitionEvent)(notifyList.get(listSize-1)); assert partEvent.getStatus(); Partition part = msc.getPartition("hive2038", "tmptbl", "b=2011"); - validateAddPartition(part, partEvent.getPartitions().get(0)); + Partition partAdded = partEvent.getPartitionIterator().next(); + validateAddPartition(part, partAdded); validateTableInAddPartition(tbl, partEvent.getTable()); validateAddPartition(part, prePartEvent.getPartitions().get(0)); @@ -313,11 +315,12 @@ hmsClient.add_partitions(Arrays.asList(partition1, partition2, partition3)); ++listSize; AddPartitionEvent multiplePartitionEvent = (AddPartitionEvent)(notifyList.get(listSize-1)); - assertEquals("Unexpected number of partitions in event!", 3, multiplePartitionEvent.getPartitions().size()); assertEquals("Unexpected table value.", table, multiplePartitionEvent.getTable()); - assertEquals("Unexpected partition value.", partition1.getValues(), multiplePartitionEvent.getPartitions().get(0).getValues()); - assertEquals("Unexpected partition value.", partition2.getValues(), multiplePartitionEvent.getPartitions().get(1).getValues()); - assertEquals("Unexpected partition value.", partition3.getValues(), multiplePartitionEvent.getPartitions().get(2).getValues()); + List multiParts = Lists.newArrayList(multiplePartitionEvent.getPartitionIterator()); + assertEquals("Unexpected number of partitions in event!", 3, multiParts.size()); + assertEquals("Unexpected partition value.", partition1.getValues(), multiParts.get(0).getValues()); + assertEquals("Unexpected partition value.", partition2.getValues(), multiParts.get(1).getValues()); + assertEquals("Unexpected partition value.", partition3.getValues(), multiParts.get(2).getValues()); driver.run(String.format("alter table %s touch partition (%s)", tblName, "b='2011'")); listSize++; @@ -352,7 +355,8 @@ AddPartitionEvent appendPartEvent = (AddPartitionEvent)(notifyList.get(listSize-1)); - validateAddPartition(newPart, appendPartEvent.getPartitions().get(0)); + Partition partAppended = appendPartEvent.getPartitionIterator().next(); + validateAddPartition(newPart, partAppended); PreAddPartitionEvent preAppendPartEvent = (PreAddPartitionEvent)(preNotifyList.get(preNotifyList.size() - 1)); Index: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java (revision 1673556) +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java (working copy) @@ -97,7 +97,7 @@ + " (i int, j int, k string) partitioned by (city string, `date` string) "); runCmd("create database " + dbName); // Need a separate table for ACID testing since it has to be bucketed and it has to be Acid - runCmd("create table " + acidTableName + " (i int, j int) clustered by (i) into 2 buckets " + + runCmd("create table " + acidTableName + " (i int, j int, k int) clustered by (k) into 2 buckets " + "stored as orc TBLPROPERTIES ('transactional'='true')"); } @@ -273,7 +273,7 @@ List inputs = io.getLeft(); assertEquals(1, inputs.size()); tableObj = inputs.get(0); - assertEquals(1, tableObj.getColumns().size()); + assertEquals(2, tableObj.getColumns().size()); assertEquals("j", tableObj.getColumns().get(0)); } Index: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java (revision 1673556) +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java (working copy) @@ -371,7 +371,7 @@ executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver); - executeStatementOnDriver("update " + tblName + " set a = 3", driver); + executeStatementOnDriver("update " + tblName + " set b = 'fred' where a = 1", driver); executeStatementOnDriver("delete from " + tblName + " where b = 'fred'", driver); Index: itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java (revision 1673556) +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java (working copy) @@ -50,6 +50,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.TableType; @@ -106,7 +108,7 @@ public static void setUpBeforeClass() throws SQLException, ClassNotFoundException{ Class.forName(driverName); Connection con1 = getConnection("default"); - System.setProperty(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_VERBOSE.varname, "" + true); + System.setProperty(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL.varname, "verbose"); Statement stmt1 = con1.createStatement(); assertNotNull("Statement is null", stmt1); @@ -893,6 +895,54 @@ assertFalse(res.next()); } + @Test + public void testIntervalTypes() throws Exception { + Statement stmt = con.createStatement(); + + // Since interval types not currently supported as table columns, need to create them + // as expressions. + ResultSet res = stmt.executeQuery( + "select case when c17 is null then null else interval '1' year end as col1," + + " c17 - c17 as col2 from " + dataTypeTableName + " order by col1"); + ResultSetMetaData meta = res.getMetaData(); + + assertEquals("col1", meta.getColumnLabel(1)); + assertEquals(java.sql.Types.OTHER, meta.getColumnType(1)); + assertEquals("interval_year_month", meta.getColumnTypeName(1)); + assertEquals(11, meta.getColumnDisplaySize(1)); + assertEquals(11, meta.getPrecision(1)); + assertEquals(0, meta.getScale(1)); + assertEquals(HiveIntervalYearMonth.class.getName(), meta.getColumnClassName(1)); + + assertEquals("col2", meta.getColumnLabel(2)); + assertEquals(java.sql.Types.OTHER, meta.getColumnType(2)); + assertEquals("interval_day_time", meta.getColumnTypeName(2)); + assertEquals(29, meta.getColumnDisplaySize(2)); + assertEquals(29, meta.getPrecision(2)); + assertEquals(0, meta.getScale(2)); + assertEquals(HiveIntervalDayTime.class.getName(), meta.getColumnClassName(2)); + + // row 1 - results should be null + assertTrue(res.next()); + // skip the last (partitioning) column since it is always non-null + for (int i = 1; i < meta.getColumnCount(); i++) { + assertNull("Column " + i + " should be null", res.getObject(i)); + } + + // row 2 - results should be null + assertTrue(res.next()); + for (int i = 1; i < meta.getColumnCount(); i++) { + assertNull("Column " + i + " should be null", res.getObject(i)); + } + + // row 3 + assertTrue(res.next()); + assertEquals("1-0", res.getString(1)); + assertEquals(1, ((HiveIntervalYearMonth) res.getObject(1)).getYears()); + assertEquals("0 00:00:00.000000000", res.getString(2)); + assertEquals(0, ((HiveIntervalDayTime) res.getObject(2)).getDays()); + } + private void doTestSelectAll(String tableName, int maxRows, int fetchSize) throws Exception { boolean isPartitionTable = tableName.equals(partitionedTableName); Index: itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java (revision 1673556) +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java (working copy) @@ -266,7 +266,7 @@ // Set some conf parameters String hiveConf = "hive.cli.print.header=true;hive.server2.async.exec.shutdown.timeout=20;" - + "hive.server2.async.exec.threads=30;hive.server2.thrift.http.max.worker.threads=15"; + + "hive.server2.async.exec.threads=30;hive.server2.thrift.max.worker.threads=15"; // Set some conf vars String hiveVar = "stab=salesTable;icol=customerID"; String jdbcUri = miniHS2.getJdbcURL() + "?" + hiveConf + "#" + hiveVar; @@ -284,7 +284,7 @@ verifyConfProperty(stmt, "hive.cli.print.header", "true"); verifyConfProperty(stmt, "hive.server2.async.exec.shutdown.timeout", "20"); verifyConfProperty(stmt, "hive.server2.async.exec.threads", "30"); - verifyConfProperty(stmt, "hive.server2.thrift.http.max.worker.threads", + verifyConfProperty(stmt, "hive.server2.thrift.max.worker.threads", "15"); verifyConfProperty(stmt, "stab", "salesTable"); verifyConfProperty(stmt, "icol", "customerID"); Index: itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java (revision 1673556) +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java (working copy) @@ -155,7 +155,7 @@ cause = cause.getCause(); } Assert.assertEquals("org.apache.http.NoHttpResponseException", cause.getClass().getName()); - Assert.assertEquals("The target server failed to respond", cause.getMessage()); + Assert.assertTrue(cause.getMessage().contains("failed to respond")); } miniHS2.stop(); } Index: itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPI.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPI.java (revision 0) +++ itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPI.java (working copy) @@ -0,0 +1,379 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.service.cli.operation; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hive.jdbc.miniHS2.MiniHS2; +import org.apache.hive.service.cli.CLIServiceClient; +import org.apache.hive.service.cli.FetchOrientation; +import org.apache.hive.service.cli.FetchType; +import org.apache.hive.service.cli.HiveSQLException; +import org.apache.hive.service.cli.OperationHandle; +import org.apache.hive.service.cli.OperationState; +import org.apache.hive.service.cli.OperationStatus; +import org.apache.hive.service.cli.RowSet; +import org.apache.hive.service.cli.SessionHandle; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * TestOperationLoggingAPI + * Test the FetchResults of TFetchType.LOG in thrift level. + */ +public class TestOperationLoggingAPI { + private static HiveConf hiveConf; + private final String tableName = "testOperationLoggingAPI_table"; + private File dataFile; + private CLIServiceClient client; + private static MiniHS2 miniHS2 = null; + private static Map confOverlay; + private SessionHandle sessionHandle; + private final String sql = "select * from " + tableName; + private final String sqlCntStar = "select count(*) from " + tableName; + private final String[] expectedLogs = { + "Parsing command", + "Parse Completed", + "Starting Semantic Analysis", + "Semantic Analysis Completed", + "Starting command" + }; + private final String[] expectedLogsExecution = { + "Number of reduce tasks determined at compile time", + "number of splits", + "Submitting tokens for job", + "Ended Job" + }; + private final String[] expectedLogsPerformance = { + "", + "", + "", + "" + }; + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + hiveConf = new HiveConf(); + hiveConf.set(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL.varname, "verbose"); + // We need to set the below parameter to test performance level logging + hiveConf.set("hive.ql.log.PerfLogger.level", "INFO,DRFA"); + miniHS2 = new MiniHS2(hiveConf); + confOverlay = new HashMap(); + confOverlay.put(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + miniHS2.start(confOverlay); + } + + /** + * Open a session, and create a table for cases usage + * @throws Exception + */ + @Before + public void setUp() throws Exception { + dataFile = new File(hiveConf.get("test.data.files"), "kv1.txt"); + client = miniHS2.getServiceClient(); + sessionHandle = setupSession(); + } + + @After + public void tearDown() throws Exception { + // Cleanup + String queryString = "DROP TABLE " + tableName; + client.executeStatement(sessionHandle, queryString, null); + + client.closeSession(sessionHandle); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + miniHS2.stop(); + } + + @Test + public void testFetchResultsOfLog() throws Exception { + // verify whether the sql operation log is generated and fetch correctly. + OperationHandle operationHandle = client.executeStatement(sessionHandle, sql, null); + RowSet rowSetLog = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, + FetchType.LOG); + verifyFetchedLog(rowSetLog, expectedLogs); + } + + @Test + public void testFetchResultsOfLogAsync() throws Exception { + // verify whether the sql operation log is generated and fetch correctly in async mode. + OperationHandle operationHandle = client.executeStatementAsync(sessionHandle, sql, null); + + // Poll on the operation status till the query is completed + boolean isQueryRunning = true; + long pollTimeout = System.currentTimeMillis() + 100000; + OperationStatus opStatus; + OperationState state = null; + RowSet rowSetAccumulated = null; + StringBuilder logs = new StringBuilder(); + + while (isQueryRunning) { + // Break if polling times out + if (System.currentTimeMillis() > pollTimeout) { + break; + } + opStatus = client.getOperationStatus(operationHandle); + Assert.assertNotNull(opStatus); + state = opStatus.getState(); + + rowSetAccumulated = client.fetchResults(operationHandle, FetchOrientation.FETCH_NEXT, 2000, + FetchType.LOG); + for (Object[] row : rowSetAccumulated) { + logs.append(row[0]); + } + + if (state == OperationState.CANCELED || + state == OperationState.CLOSED || + state == OperationState.FINISHED || + state == OperationState.ERROR) { + isQueryRunning = false; + } + Thread.sleep(10); + } + // The sql should be completed now. + Assert.assertEquals("Query should be finished", OperationState.FINISHED, state); + + // Verify the accumulated logs + verifyFetchedLogPost(logs.toString(), expectedLogs, true); + + // Verify the fetched logs from the beginning of the log file + RowSet rowSet = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 2000, + FetchType.LOG); + verifyFetchedLog(rowSet, expectedLogs); + } + + @Test + public void testFetchResultsOfLogWithOrientation() throws Exception { + // (FETCH_FIRST) execute a sql, and fetch its sql operation log as expected value + OperationHandle operationHandle = client.executeStatement(sessionHandle, sql, null); + RowSet rowSetLog = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, + FetchType.LOG); + int expectedLogLength = rowSetLog.numRows(); + + // (FETCH_NEXT) execute the same sql again, + // and fetch the sql operation log with FETCH_NEXT orientation + OperationHandle operationHandleWithOrientation = client.executeStatement(sessionHandle, sql, + null); + RowSet rowSetLogWithOrientation; + int logLength = 0; + int maxRows = calculateProperMaxRows(expectedLogLength); + do { + rowSetLogWithOrientation = client.fetchResults(operationHandleWithOrientation, + FetchOrientation.FETCH_NEXT, maxRows, FetchType.LOG); + logLength += rowSetLogWithOrientation.numRows(); + } while (rowSetLogWithOrientation.numRows() == maxRows); + Assert.assertEquals(expectedLogLength, logLength); + + // (FETCH_FIRST) fetch again from the same operation handle with FETCH_FIRST orientation + rowSetLogWithOrientation = client.fetchResults(operationHandleWithOrientation, + FetchOrientation.FETCH_FIRST, 1000, FetchType.LOG); + verifyFetchedLog(rowSetLogWithOrientation, expectedLogs); + } + + @Test + public void testFetchResultsOfLogWithVerboseMode() throws Exception { + String queryString = "set hive.server2.logging.operation.level=verbose"; + client.executeStatement(sessionHandle, queryString, null); + // verify whether the sql operation log is generated and fetch correctly. + OperationHandle operationHandle = client.executeStatement(sessionHandle, sqlCntStar, null); + RowSet rowSetLog = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, + FetchType.LOG); + // Verbose Logs should contain everything, including execution and performance + verifyFetchedLog(rowSetLog, expectedLogs); + verifyFetchedLog(rowSetLog, expectedLogsExecution); + verifyFetchedLog(rowSetLog, expectedLogsPerformance); + } + + @Test + public void testFetchResultsOfLogWithPerformanceMode() throws Exception { + try { + String queryString = "set hive.server2.logging.operation.level=performance"; + client.executeStatement(sessionHandle, queryString, null); + // verify whether the sql operation log is generated and fetch correctly. + OperationHandle operationHandle = client.executeStatement(sessionHandle, sqlCntStar, null); + RowSet rowSetLog = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, + FetchType.LOG); + // rowSetLog should contain execution as well as performance logs + verifyFetchedLog(rowSetLog, expectedLogsExecution); + verifyFetchedLog(rowSetLog, expectedLogsPerformance); + verifyMissingContentsInFetchedLog(rowSetLog, expectedLogs); + } finally { + // Restore everything to default setup to avoid discrepancy between junit test runs + String queryString2 = "set hive.server2.logging.operation.level=verbose"; + client.executeStatement(sessionHandle, queryString2, null); + } + } + + @Test + public void testFetchResultsOfLogWithExecutionMode() throws Exception { + try { + String queryString = "set hive.server2.logging.operation.level=execution"; + client.executeStatement(sessionHandle, queryString, null); + // verify whether the sql operation log is generated and fetch correctly. + OperationHandle operationHandle = client.executeStatement(sessionHandle, sqlCntStar, null); + RowSet rowSetLog = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, + FetchType.LOG); + verifyFetchedLog(rowSetLog, expectedLogsExecution); + verifyMissingContentsInFetchedLog(rowSetLog, expectedLogsPerformance); + verifyMissingContentsInFetchedLog(rowSetLog, expectedLogs); + } finally { + // Restore everything to default setup to avoid discrepancy between junit test runs + String queryString2 = "set hive.server2.logging.operation.level=verbose"; + client.executeStatement(sessionHandle, queryString2, null); + } + } + + @Test + public void testFetchResultsOfLogWithNoneMode() throws Exception { + try { + String queryString = "set hive.server2.logging.operation.level=none"; + client.executeStatement(sessionHandle, queryString, null); + // verify whether the sql operation log is generated and fetch correctly. + OperationHandle operationHandle = client.executeStatement(sessionHandle, sqlCntStar, null); + RowSet rowSetLog = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, + FetchType.LOG); + // We should not get any rows. + assert(rowSetLog.numRows() == 0); + } finally { + // Restore everything to default setup to avoid discrepancy between junit test runs + String queryString2 = "set hive.server2.logging.operation.level=verbose"; + client.executeStatement(sessionHandle, queryString2, null); + } + } + + @Test + public void testFetchResultsOfLogCleanup() throws Exception { + // Verify cleanup functionality. + // Open a new session, since this case needs to close the session in the end. + SessionHandle sessionHandleCleanup = setupSession(); + + // prepare + OperationHandle operationHandle = client.executeStatement(sessionHandleCleanup, sql, null); + RowSet rowSetLog = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, + FetchType.LOG); + verifyFetchedLog(rowSetLog, expectedLogs); + + File sessionLogDir = new File( + hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION) + + File.separator + sessionHandleCleanup.getHandleIdentifier()); + File operationLogFile = new File(sessionLogDir, operationHandle.getHandleIdentifier().toString()); + + // check whether exception is thrown when fetching log from a closed operation. + client.closeOperation(operationHandle); + try { + client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, FetchType.LOG); + Assert.fail("Fetch should fail"); + } catch (HiveSQLException e) { + Assert.assertTrue(e.getMessage().contains("Invalid OperationHandle:")); + } + + // check whether operation log file is deleted. + if (operationLogFile.exists()) { + Assert.fail("Operation log file should be deleted."); + } + + // check whether session log dir is deleted after session is closed. + client.closeSession(sessionHandleCleanup); + if (sessionLogDir.exists()) { + Assert.fail("Session log dir should be deleted."); + } + } + + private SessionHandle setupSession() throws Exception { + // Open a session + SessionHandle sessionHandle = client.openSession(null, null, null); + + // Change lock manager to embedded mode + String queryString = "SET hive.lock.manager=" + + "org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager"; + client.executeStatement(sessionHandle, queryString, null); + + // Drop the table if it exists + queryString = "DROP TABLE IF EXISTS " + tableName; + client.executeStatement(sessionHandle, queryString, null); + + // Create a test table + queryString = "create table " + tableName + " (key int, value string)"; + client.executeStatement(sessionHandle, queryString, null); + + // Load data + queryString = "load data local inpath '" + dataFile + "' into table " + tableName; + client.executeStatement(sessionHandle, queryString, null); + + // Precondition check: verify whether the table is created and data is fetched correctly. + OperationHandle operationHandle = client.executeStatement(sessionHandle, sql, null); + RowSet rowSetResult = client.fetchResults(operationHandle); + Assert.assertEquals(500, rowSetResult.numRows()); + Assert.assertEquals(238, rowSetResult.iterator().next()[0]); + Assert.assertEquals("val_238", rowSetResult.iterator().next()[1]); + + return sessionHandle; + } + + // Since the log length of the sql operation may vary during HIVE dev, calculate a proper maxRows. + private int calculateProperMaxRows(int len) { + if (len < 10) { + return 1; + } else if (len < 100) { + return 10; + } else { + return 100; + } + } + + private String verifyFetchedLogPre(RowSet rowSet, String[] el) { + StringBuilder stringBuilder = new StringBuilder(); + + for (Object[] row : rowSet) { + stringBuilder.append(row[0]); + } + + return stringBuilder.toString(); + } + + private void verifyFetchedLog(RowSet rowSet, String[] el) { + String logs = verifyFetchedLogPre(rowSet, el); + verifyFetchedLogPost(logs, el, true); + } + + private void verifyMissingContentsInFetchedLog(RowSet rowSet, String[] el) { + String logs = verifyFetchedLogPre(rowSet, el); + verifyFetchedLogPost(logs, el, false); + } + + private void verifyFetchedLogPost(String logs, String[] el, boolean contains) { + for (String log : el) { + if (contains) { + Assert.assertTrue("Checking for presence of " + log, logs.contains(log)); + } else { + Assert.assertFalse("Checking for absence of " + log, logs.contains(log)); + } + } + } +} Index: itests/hive-unit/src/test/java/org/apache/hive/service/cli/thrift/TestThriftHttpCLIService.java =================================================================== --- itests/hive-unit/src/test/java/org/apache/hive/service/cli/thrift/TestThriftHttpCLIService.java (revision 1673556) +++ itests/hive-unit/src/test/java/org/apache/hive/service/cli/thrift/TestThriftHttpCLIService.java (working copy) @@ -160,7 +160,7 @@ String httpUrl = transportMode + "://" + host + ":" + port + "/" + thriftHttpPath + "/"; httpClient.addRequestInterceptor( - new HttpBasicAuthInterceptor(USERNAME, PASSWORD)); + new HttpBasicAuthInterceptor(USERNAME, PASSWORD, null, null)); return new THttpClient(httpUrl, httpClient); } Index: itests/pom.xml =================================================================== --- itests/pom.xml (revision 1673556) +++ itests/pom.xml (working copy) @@ -93,6 +93,9 @@ mkdir -p $DOWNLOAD_DIR download "http://d3jw87u4immizc.cloudfront.net/spark-tarball/spark-${spark.version}-bin-hadoop2-without-hive.tgz" "spark" cp -f $HIVE_ROOT/data/conf/spark/log4j.properties $BASE_DIR/spark/conf/ + sed '/package /d' ${basedir}/${hive.path.to.root}/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleAdd.java > /tmp/UDFExampleAdd.java + javac -cp ${settings.localRepository}/org/apache/hive/hive-exec/${project.version}/hive-exec-${project.version}.jar /tmp/UDFExampleAdd.java -d /tmp + jar -cf /tmp/udfexampleadd-1.0.jar -C /tmp UDFExampleAdd.class Index: itests/src/test/resources/testconfiguration.properties =================================================================== --- itests/src/test/resources/testconfiguration.properties (revision 1673556) +++ itests/src/test/resources/testconfiguration.properties (working copy) @@ -29,6 +29,7 @@ list_bucket_dml_10.q,\ load_fs2.q,\ load_hdfs_file_with_space_in_the_name.q,\ + non_native_window_udf.q, \ optrstat_groupby.q,\ parallel_orderby.q,\ ql_rewrite_gbtoidx.q,\ @@ -180,9 +181,11 @@ update_where_non_partitioned.q,\ update_where_partitioned.q,\ update_two_cols.q,\ + vector_aggregate_9.q,\ vector_between_in.q,\ vector_bucket.q,\ vector_cast_constant.q,\ + vector_char_2.q,\ vector_char_4.q,\ vector_char_mapjoin1.q,\ vector_char_simple.q,\ @@ -190,6 +193,7 @@ vector_coalesce_2.q,\ vector_count_distinct.q,\ vector_data_types.q,\ + vector_date_1.q,\ vector_decimal_1.q,\ vector_decimal_10_0.q,\ vector_decimal_2.q,\ @@ -203,6 +207,8 @@ vector_decimal_mapjoin.q,\ vector_decimal_math_funcs.q,\ vector_decimal_precision.q,\ + vector_decimal_round.q,\ + vector_decimal_round_2.q,\ vector_decimal_trailing.q,\ vector_decimal_udf.q,\ vector_decimal_udf2.q,\ @@ -210,8 +216,12 @@ vector_elt.q,\ vector_groupby_3.q,\ vector_groupby_reduce.q,\ + vector_if_expr.q,\ + vector_interval_1.q,\ + vector_interval_2.q,\ vector_left_outer_join.q,\ vector_mapjoin_reduce.q,\ + vector_multi_insert.q,\ vector_non_string_partition.q,\ vector_orderby_5.q,\ vector_partition_diff_num_cols.q,\ @@ -287,6 +297,8 @@ bucket_map_join_tez2.q,\ dynamic_partition_pruning.q,\ dynamic_partition_pruning_2.q,\ + explainuser_1.q,\ + explainuser_2.q,\ hybridhashjoin.q,\ mapjoin_decimal.q,\ lvj_mapjoin.q, \ Index: jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java =================================================================== --- jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java (revision 1673556) +++ jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java (working copy) @@ -44,6 +44,8 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hive.service.cli.TableSchema; import org.apache.hive.service.cli.Type; @@ -443,6 +445,10 @@ return new BigDecimal((String)value); case DATE_TYPE: return Date.valueOf((String) value); + case INTERVAL_YEAR_MONTH_TYPE: + return HiveIntervalYearMonth.valueOf((String) value); + case INTERVAL_DAY_TIME_TYPE: + return HiveIntervalDayTime.valueOf((String) value); case ARRAY_TYPE: case MAP_TYPE: case STRUCT_TYPE: Index: jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java =================================================================== --- jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java (revision 1673556) +++ jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java (working copy) @@ -50,10 +50,11 @@ import javax.security.sasl.Sasl; import javax.security.sasl.SaslException; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.protocol.HttpContext; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hive.jdbc.Utils.JdbcConnectionParams; import org.apache.hive.service.auth.HiveAuthFactory; import org.apache.hive.service.auth.KerberosSaslHelper; @@ -73,9 +74,17 @@ import org.apache.hive.service.cli.thrift.TRenewDelegationTokenResp; import org.apache.hive.service.cli.thrift.TSessionHandle; import org.apache.http.HttpRequestInterceptor; -import org.apache.http.conn.scheme.Scheme; +import org.apache.http.HttpResponse; +import org.apache.http.client.CookieStore; +import org.apache.http.client.ServiceUnavailableRetryStrategy; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; import org.apache.http.conn.ssl.SSLSocketFactory; -import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.impl.client.BasicCookieStore; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.BasicHttpClientConnectionManager; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.transport.THttpClient; @@ -173,6 +182,7 @@ supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V5); supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6); supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V7); + supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8); // open client session openSession(); @@ -235,7 +245,7 @@ } private TTransport createHttpTransport() throws SQLException, TTransportException { - DefaultHttpClient httpClient; + CloseableHttpClient httpClient; boolean useSsl = isSslConnection(); // Create an http client from the configs httpClient = getHttpClient(useSsl); @@ -259,35 +269,76 @@ return transport; } - private DefaultHttpClient getHttpClient(Boolean useSsl) throws SQLException { - DefaultHttpClient httpClient = new DefaultHttpClient(); + private CloseableHttpClient getHttpClient(Boolean useSsl) throws SQLException { + boolean isCookieEnabled = sessConfMap.get(JdbcConnectionParams.COOKIE_AUTH) == null || + (!JdbcConnectionParams.COOKIE_AUTH_FALSE.equalsIgnoreCase( + sessConfMap.get(JdbcConnectionParams.COOKIE_AUTH))); + String cookieName = sessConfMap.get(JdbcConnectionParams.COOKIE_NAME) == null ? + JdbcConnectionParams.DEFAULT_COOKIE_NAMES_HS2 : + sessConfMap.get(JdbcConnectionParams.COOKIE_NAME); + CookieStore cookieStore = isCookieEnabled ? new BasicCookieStore() : null; + HttpClientBuilder httpClientBuilder; // Request interceptor for any request pre-processing logic HttpRequestInterceptor requestInterceptor; - // If Kerberos + + // Configure http client for kerberos/password based authentication if (isKerberosAuthMode()) { /** * Add an interceptor which sets the appropriate header in the request. * It does the kerberos authentication and get the final service ticket, * for sending to the server before every request. * In https mode, the entire information is encrypted - * TODO: Optimize this with a mix of kerberos + using cookie. */ requestInterceptor = new HttpKerberosRequestInterceptor(sessConfMap.get(JdbcConnectionParams.AUTH_PRINCIPAL), - host, getServerHttpUrl(useSsl), assumeSubject); + host, getServerHttpUrl(useSsl), assumeSubject, cookieStore, cookieName); } else { /** * Add an interceptor to pass username/password in the header. * In https mode, the entire information is encrypted */ - requestInterceptor = new HttpBasicAuthInterceptor(getUserName(), getPassword()); + requestInterceptor = new HttpBasicAuthInterceptor(getUserName(), getPassword(), + cookieStore, cookieName); } - // Configure httpClient for SSL + // Configure http client for cookie based authentication + if (isCookieEnabled) { + // Create a http client with a retry mechanism when the server returns a status code of 401. + httpClientBuilder = + HttpClients.custom().setServiceUnavailableRetryStrategy( + new ServiceUnavailableRetryStrategy() { + + @Override + public boolean retryRequest( + final HttpResponse response, + final int executionCount, + final HttpContext context) { + int statusCode = response.getStatusLine().getStatusCode(); + boolean ret = statusCode == 401 && executionCount <= 1; + + // Set the context attribute to true which will be interpreted by the request interceptor + if (ret) { + context.setAttribute(Utils.HIVE_SERVER2_RETRY_KEY, Utils.HIVE_SERVER2_RETRY_TRUE); + } + return ret; + } + + @Override + public long getRetryInterval() { + // Immediate retry + return 0; + } + }); + } else { + httpClientBuilder = HttpClientBuilder.create(); + } + // Add the request interceptor to the client builder + httpClientBuilder.addInterceptorFirst(requestInterceptor); + // Configure http client for SSL if (useSsl) { String sslTrustStorePath = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE); String sslTrustStorePassword = sessConfMap.get( - JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); + JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); KeyStore sslTrustStore; SSLSocketFactory socketFactory; /** @@ -311,21 +362,25 @@ // Pick trust store config from the given path sslTrustStore = KeyStore.getInstance(JdbcConnectionParams.SSL_TRUST_STORE_TYPE); sslTrustStore.load(new FileInputStream(sslTrustStorePath), - sslTrustStorePassword.toCharArray()); + sslTrustStorePassword.toCharArray()); socketFactory = new SSLSocketFactory(sslTrustStore); } socketFactory.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); - Scheme sslScheme = new Scheme("https", 443, socketFactory); - httpClient.getConnectionManager().getSchemeRegistry().register(sslScheme); + + final Registry registry = + RegistryBuilder.create() + .register("https", socketFactory) + .build(); + + httpClientBuilder.setConnectionManager(new BasicHttpClientConnectionManager(registry)); } catch (Exception e) { String msg = "Could not create an https connection to " + - jdbcUriString + ". " + e.getMessage(); + jdbcUriString + ". " + e.getMessage(); throw new SQLException(msg, " 08S01", e); } } - httpClient.addRequestInterceptor(requestInterceptor); - return httpClient; + return httpClientBuilder.build(); } /** Index: jdbc/src/java/org/apache/hive/jdbc/HiveResultSetMetaData.java =================================================================== --- jdbc/src/java/org/apache/hive/jdbc/HiveResultSetMetaData.java (revision 1673556) +++ jdbc/src/java/org/apache/hive/jdbc/HiveResultSetMetaData.java (working copy) @@ -21,6 +21,7 @@ import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.List; +import org.apache.hive.service.cli.Type; /** * HiveResultSetMetaData. @@ -43,9 +44,13 @@ throw new SQLException("Method not supported"); } + private Type getHiveType(int column) throws SQLException { + return JdbcColumn.typeStringToHiveType(columnTypes.get(toZeroIndex(column))); + } + public String getColumnClassName(int column) throws SQLException { - int columnType = getColumnType(column); - return JdbcColumn.columnClassName(columnType, columnAttributes.get(toZeroIndex(column))); + return JdbcColumn.columnClassName(getHiveType(column), + columnAttributes.get(toZeroIndex(column))); } public int getColumnCount() throws SQLException { @@ -53,9 +58,8 @@ } public int getColumnDisplaySize(int column) throws SQLException { - int columnType = getColumnType(column); - - return JdbcColumn.columnDisplaySize(columnType, columnAttributes.get(toZeroIndex(column))); + return JdbcColumn.columnDisplaySize(getHiveType(column), + columnAttributes.get(toZeroIndex(column))); } public String getColumnLabel(int column) throws SQLException { @@ -79,15 +83,13 @@ } public int getPrecision(int column) throws SQLException { - int columnType = getColumnType(column); - - return JdbcColumn.columnPrecision(columnType, columnAttributes.get(toZeroIndex(column))); + return JdbcColumn.columnPrecision(getHiveType(column), + columnAttributes.get(toZeroIndex(column))); } public int getScale(int column) throws SQLException { - int columnType = getColumnType(column); - - return JdbcColumn.columnScale(columnType, columnAttributes.get(toZeroIndex(column))); + return JdbcColumn.columnScale(getHiveType(column), + columnAttributes.get(toZeroIndex(column))); } public String getSchemaName(int column) throws SQLException { Index: jdbc/src/java/org/apache/hive/jdbc/HttpBasicAuthInterceptor.java =================================================================== --- jdbc/src/java/org/apache/hive/jdbc/HttpBasicAuthInterceptor.java (revision 1673556) +++ jdbc/src/java/org/apache/hive/jdbc/HttpBasicAuthInterceptor.java (working copy) @@ -25,6 +25,8 @@ import org.apache.http.HttpRequest; import org.apache.http.HttpRequestInterceptor; import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.CookieStore; +import org.apache.http.client.protocol.ClientContext; import org.apache.http.impl.auth.AuthSchemeBase; import org.apache.http.impl.auth.BasicScheme; import org.apache.http.protocol.HttpContext; @@ -37,20 +39,42 @@ public class HttpBasicAuthInterceptor implements HttpRequestInterceptor { UsernamePasswordCredentials credentials; AuthSchemeBase authScheme; + CookieStore cookieStore; + boolean isCookieEnabled; + String cookieName; - public HttpBasicAuthInterceptor(String username, String password) { + public HttpBasicAuthInterceptor(String username, String password, CookieStore cookieStore, + String cn) { if(username != null){ credentials = new UsernamePasswordCredentials(username, password); } authScheme = new BasicScheme(); + this.cookieStore = cookieStore; + isCookieEnabled = (cookieStore != null); + cookieName = cn; } @Override public void process(HttpRequest httpRequest, HttpContext httpContext) throws HttpException, IOException { - Header basicAuthHeader = authScheme.authenticate( - credentials, httpRequest, httpContext); - httpRequest.addHeader(basicAuthHeader); + if (isCookieEnabled) { + httpContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore); + } + // Add the authentication details under the following scenarios: + // 1. Cookie Authentication is disabled OR + // 2. The first time when the request is sent OR + // 3. The server returns a 401, which sometimes means the cookie has expired + if (!isCookieEnabled || ((httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY) == null && + (cookieStore == null || (cookieStore != null && + Utils.needToSendCredentials(cookieStore, cookieName)))) || + (httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY) != null && + httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY). + equals(Utils.HIVE_SERVER2_RETRY_TRUE)))) { + Header basicAuthHeader = authScheme.authenticate(credentials, httpRequest, httpContext); + httpRequest.addHeader(basicAuthHeader); + } + if (isCookieEnabled) { + httpContext.setAttribute(Utils.HIVE_SERVER2_RETRY_KEY, Utils.HIVE_SERVER2_RETRY_FALSE); + } } - } Index: jdbc/src/java/org/apache/hive/jdbc/HttpKerberosRequestInterceptor.java =================================================================== --- jdbc/src/java/org/apache/hive/jdbc/HttpKerberosRequestInterceptor.java (revision 1673556) +++ jdbc/src/java/org/apache/hive/jdbc/HttpKerberosRequestInterceptor.java (working copy) @@ -25,6 +25,8 @@ import org.apache.http.HttpException; import org.apache.http.HttpRequest; import org.apache.http.HttpRequestInterceptor; +import org.apache.http.client.CookieStore; +import org.apache.http.client.protocol.ClientContext; import org.apache.http.protocol.HttpContext; /** @@ -40,31 +42,59 @@ String host; String serverHttpUrl; boolean assumeSubject; + CookieStore cookieStore; + boolean isCookieEnabled; + String cookieName; // A fair reentrant lock private static ReentrantLock kerberosLock = new ReentrantLock(true); public HttpKerberosRequestInterceptor(String principal, String host, - String serverHttpUrl, boolean assumeSubject) { + String serverHttpUrl, boolean assumeSubject, CookieStore cs, String cn) { this.principal = principal; this.host = host; this.serverHttpUrl = serverHttpUrl; this.assumeSubject = assumeSubject; + this.cookieStore = cs; + isCookieEnabled = (cs != null); + cookieName = cn; } @Override public void process(HttpRequest httpRequest, HttpContext httpContext) throws HttpException, IOException { String kerberosAuthHeader; + try { // Generate the service ticket for sending to the server. // Locking ensures the tokens are unique in case of concurrent requests kerberosLock.lock(); - kerberosAuthHeader = HttpAuthUtils.getKerberosServiceTicket( - principal, host, serverHttpUrl, assumeSubject); - // Set the session key token (Base64 encoded) in the headers - httpRequest.addHeader(HttpAuthUtils.AUTHORIZATION + ": " + - HttpAuthUtils.NEGOTIATE + " ", kerberosAuthHeader); + // If cookie based authentication is allowed, generate ticket only when necessary. + // The necessary condition is either when there are no server side cookies in the + // cookiestore which can be send back or when the server returns a 401 error code + // indicating that the previous cookie has expired. + if (isCookieEnabled) { + httpContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore); + } + // Generate the kerberos ticket under the following scenarios: + // 1. Cookie Authentication is disabled OR + // 2. The first time when the request is sent OR + // 3. The server returns a 401, which sometimes means the cookie has expired + if (!isCookieEnabled || ((httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY) == null && + (cookieStore == null || (cookieStore != null && + Utils.needToSendCredentials(cookieStore, cookieName)))) || + (httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY) != null && + httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY). + equals(Utils.HIVE_SERVER2_RETRY_TRUE)))) { + kerberosAuthHeader = HttpAuthUtils.getKerberosServiceTicket( + principal, host, serverHttpUrl, assumeSubject); + // Set the session key token (Base64 encoded) in the headers + httpRequest.addHeader(HttpAuthUtils.AUTHORIZATION + ": " + + HttpAuthUtils.NEGOTIATE + " ", kerberosAuthHeader); + } + if (isCookieEnabled) { + httpContext.setAttribute(Utils.HIVE_SERVER2_RETRY_KEY, Utils.HIVE_SERVER2_RETRY_FALSE); + } } catch (Exception e) { throw new HttpException(e.getMessage(), e); } Index: jdbc/src/java/org/apache/hive/jdbc/JdbcColumn.java =================================================================== --- jdbc/src/java/org/apache/hive/jdbc/JdbcColumn.java (revision 1673556) +++ jdbc/src/java/org/apache/hive/jdbc/JdbcColumn.java (working copy) @@ -18,7 +18,10 @@ package org.apache.hive.jdbc; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hive.service.cli.Type; import java.math.BigInteger; import java.sql.Date; @@ -64,10 +67,12 @@ return type; } - static String columnClassName(int columnType, JdbcColumnAttributes columnAttributes) + static String columnClassName(Type hiveType, JdbcColumnAttributes columnAttributes) throws SQLException { - // according to hiveTypeToSqlType possible options are: + int columnType = hiveTypeToSqlType(hiveType); switch(columnType) { + case Types.NULL: + return "null"; case Types.BOOLEAN: return Boolean.class.getName(); case Types.CHAR: @@ -93,7 +98,17 @@ return BigInteger.class.getName(); case Types.BINARY: return byte[].class.getName(); - case Types.JAVA_OBJECT: + case Types.OTHER: + case Types.JAVA_OBJECT: { + switch (hiveType) { + case INTERVAL_YEAR_MONTH_TYPE: + return HiveIntervalYearMonth.class.getName(); + case INTERVAL_DAY_TIME_TYPE: + return HiveIntervalDayTime.class.getName(); + default: + return String.class.getName(); + } + } case Types.ARRAY: case Types.STRUCT: return String.class.getName(); @@ -102,45 +117,61 @@ } } - public static int hiveTypeToSqlType(String type) throws SQLException { + static Type typeStringToHiveType(String type) throws SQLException { if ("string".equalsIgnoreCase(type)) { - return Types.VARCHAR; + return Type.STRING_TYPE; } else if ("varchar".equalsIgnoreCase(type)) { - return Types.VARCHAR; + return Type.VARCHAR_TYPE; } else if ("char".equalsIgnoreCase(type)) { - return Types.CHAR; + return Type.CHAR_TYPE; } else if ("float".equalsIgnoreCase(type)) { - return Types.FLOAT; + return Type.FLOAT_TYPE; } else if ("double".equalsIgnoreCase(type)) { - return Types.DOUBLE; + return Type.DOUBLE_TYPE; } else if ("boolean".equalsIgnoreCase(type)) { - return Types.BOOLEAN; + return Type.BOOLEAN_TYPE; } else if ("tinyint".equalsIgnoreCase(type)) { - return Types.TINYINT; + return Type.TINYINT_TYPE; } else if ("smallint".equalsIgnoreCase(type)) { - return Types.SMALLINT; + return Type.SMALLINT_TYPE; } else if ("int".equalsIgnoreCase(type)) { - return Types.INTEGER; + return Type.INT_TYPE; } else if ("bigint".equalsIgnoreCase(type)) { - return Types.BIGINT; + return Type.BIGINT_TYPE; } else if ("date".equalsIgnoreCase(type)) { - return Types.DATE; + return Type.DATE_TYPE; } else if ("timestamp".equalsIgnoreCase(type)) { - return Types.TIMESTAMP; + return Type.TIMESTAMP_TYPE; + } else if ("interval_year_month".equalsIgnoreCase(type)) { + return Type.INTERVAL_YEAR_MONTH_TYPE; + } else if ("interval_day_time".equalsIgnoreCase(type)) { + return Type.INTERVAL_DAY_TIME_TYPE; } else if ("decimal".equalsIgnoreCase(type)) { - return Types.DECIMAL; + return Type.DECIMAL_TYPE; } else if ("binary".equalsIgnoreCase(type)) { - return Types.BINARY; + return Type.BINARY_TYPE; } else if ("map".equalsIgnoreCase(type)) { - return Types.JAVA_OBJECT; + return Type.MAP_TYPE; } else if ("array".equalsIgnoreCase(type)) { - return Types.ARRAY; + return Type.ARRAY_TYPE; } else if ("struct".equalsIgnoreCase(type)) { - return Types.STRUCT; + return Type.STRUCT_TYPE; } throw new SQLException("Unrecognized column type: " + type); } + public static int hiveTypeToSqlType(Type hiveType) throws SQLException { + return hiveType.toJavaSQLType(); + } + + public static int hiveTypeToSqlType(String type) throws SQLException { + if ("void".equalsIgnoreCase(type) || "null".equalsIgnoreCase(type)) { + return Types.NULL; + } else { + return hiveTypeToSqlType(typeStringToHiveType(type)); + } + } + static String getColumnTypeName(String type) throws SQLException { // we need to convert the Hive type to the SQL type name // TODO: this would be better handled in an enum @@ -168,11 +199,15 @@ return serdeConstants.TIMESTAMP_TYPE_NAME; } else if ("date".equalsIgnoreCase(type)) { return serdeConstants.DATE_TYPE_NAME; + } else if ("interval_year_month".equalsIgnoreCase(type)) { + return serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME; + } else if ("interval_day_time".equalsIgnoreCase(type)) { + return serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME; } else if ("decimal".equalsIgnoreCase(type)) { return serdeConstants.DECIMAL_TYPE_NAME; } else if ("binary".equalsIgnoreCase(type)) { return serdeConstants.BINARY_TYPE_NAME; - } else if ("void".equalsIgnoreCase(type)) { + } else if ("void".equalsIgnoreCase(type) || "null".equalsIgnoreCase(type)) { return serdeConstants.VOID_TYPE_NAME; } else if (type.equalsIgnoreCase("map")) { return serdeConstants.MAP_TYPE_NAME; @@ -185,26 +220,27 @@ throw new SQLException("Unrecognized column type: " + type); } - static int columnDisplaySize(int columnType, JdbcColumnAttributes columnAttributes) + static int columnDisplaySize(Type hiveType, JdbcColumnAttributes columnAttributes) throws SQLException { // according to hiveTypeToSqlType possible options are: + int columnType = hiveTypeToSqlType(hiveType); switch(columnType) { case Types.BOOLEAN: - return columnPrecision(columnType, columnAttributes); + return columnPrecision(hiveType, columnAttributes); case Types.CHAR: case Types.VARCHAR: - return columnPrecision(columnType, columnAttributes); + return columnPrecision(hiveType, columnAttributes); case Types.BINARY: return Integer.MAX_VALUE; // hive has no max limit for binary case Types.TINYINT: case Types.SMALLINT: case Types.INTEGER: case Types.BIGINT: - return columnPrecision(columnType, columnAttributes) + 1; // allow +/- + return columnPrecision(hiveType, columnAttributes) + 1; // allow +/- case Types.DATE: return 10; case Types.TIMESTAMP: - return columnPrecision(columnType, columnAttributes); + return columnPrecision(hiveType, columnAttributes); // see http://download.oracle.com/javase/6/docs/api/constant-values.html#java.lang.Float.MAX_EXPONENT case Types.FLOAT: @@ -213,8 +249,10 @@ case Types.DOUBLE: return 25; // e.g. -(17#).e-#### case Types.DECIMAL: - return columnPrecision(columnType, columnAttributes) + 2; // '-' sign and '.' + return columnPrecision(hiveType, columnAttributes) + 2; // '-' sign and '.' + case Types.OTHER: case Types.JAVA_OBJECT: + return columnPrecision(hiveType, columnAttributes); case Types.ARRAY: case Types.STRUCT: return Integer.MAX_VALUE; @@ -223,8 +261,9 @@ } } - static int columnPrecision(int columnType, JdbcColumnAttributes columnAttributes) + static int columnPrecision(Type hiveType, JdbcColumnAttributes columnAttributes) throws SQLException { + int columnType = hiveTypeToSqlType(hiveType); // according to hiveTypeToSqlType possible options are: switch(columnType) { case Types.BOOLEAN: @@ -255,7 +294,19 @@ return 29; case Types.DECIMAL: return columnAttributes.precision; - case Types.JAVA_OBJECT: + case Types.OTHER: + case Types.JAVA_OBJECT: { + switch (hiveType) { + case INTERVAL_YEAR_MONTH_TYPE: + // -yyyyyyy-mm : should be more than enough + return 11; + case INTERVAL_DAY_TIME_TYPE: + // -ddddddddd hh:mm:ss.nnnnnnnnn + return 29; + default: + return Integer.MAX_VALUE; + } + } case Types.ARRAY: case Types.STRUCT: return Integer.MAX_VALUE; @@ -264,8 +315,9 @@ } } - static int columnScale(int columnType, JdbcColumnAttributes columnAttributes) + static int columnScale(Type hiveType, JdbcColumnAttributes columnAttributes) throws SQLException { + int columnType = hiveTypeToSqlType(hiveType); // according to hiveTypeToSqlType possible options are: switch(columnType) { case Types.BOOLEAN: @@ -286,6 +338,7 @@ return 9; case Types.DECIMAL: return columnAttributes.scale; + case Types.OTHER: case Types.JAVA_OBJECT: case Types.ARRAY: case Types.STRUCT: Index: jdbc/src/java/org/apache/hive/jdbc/Utils.java =================================================================== --- jdbc/src/java/org/apache/hive/jdbc/Utils.java (revision 1673556) +++ jdbc/src/java/org/apache/hive/jdbc/Utils.java (working copy) @@ -34,6 +34,8 @@ import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.thrift.TStatus; import org.apache.hive.service.cli.thrift.TStatusCode; +import org.apache.http.client.CookieStore; +import org.apache.http.cookie.Cookie; public class Utils { public static final Log LOG = LogFactory.getLog(Utils.class.getName()); @@ -56,6 +58,11 @@ private static final String URI_HIVE_PREFIX = "hive2:"; + // This value is set to true by the setServiceUnavailableRetryStrategy() when the server returns 401 + static final String HIVE_SERVER2_RETRY_KEY = "hive.server2.retryserver"; + static final String HIVE_SERVER2_RETRY_TRUE = "true"; + static final String HIVE_SERVER2_RETRY_FALSE = "false"; + public static class JdbcConnectionParams { // Note on client side parameter naming convention: // Prefer using a shorter camelCase param name instead of using the same name as the @@ -98,6 +105,11 @@ // Default namespace value on ZooKeeper. // This value is used if the param "zooKeeperNamespace" is not specified in the JDBC Uri. static final String ZOOKEEPER_DEFAULT_NAMESPACE = "hiveserver2"; + static final String COOKIE_AUTH = "cookieAuth"; + static final String COOKIE_AUTH_FALSE = "false"; + static final String COOKIE_NAME = "cookieName"; + // The default value of the cookie name when CookieAuth=true + static final String DEFAULT_COOKIE_NAMES_HS2 = "hive.server2.auth"; // Non-configurable params: // Currently supports JKS keystore format @@ -560,4 +572,28 @@ } return version; } + + /** + * The function iterates through the list of cookies in the cookiestore and tries to + * match them with the cookieName. If there is a match, the cookieStore already + * has a valid cookie and the client need not send Credentials for validation purpose. + * @param cookieStore The cookie Store + * @param cookieName Name of the cookie which needs to be validated + * @return true or false based on whether the client needs to send the credentials or + * not to the server. + */ + static boolean needToSendCredentials(CookieStore cookieStore, String cookieName) { + if (cookieName == null || cookieStore == null) { + return true; + } + + List cookies = cookieStore.getCookies(); + + for (Cookie c : cookies) { + if (c.getName().equals(cookieName)) { + return false; + } + } + return true; + } } Index: metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (revision 1673556) +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (working copy) @@ -5487,7 +5487,7 @@ ex = e; throw newMetaException(e); } finally { - endFunction("get_database", func != null, ex); + endFunction("get_function", func != null, ex); } return func; @@ -6060,6 +6060,11 @@ // don't doom the rest of the metastore. startLock.lock(); try { + startPauseMonitor(conf); + } catch (Throwable t) { + LOG.warn("Error starting the JVM pause monitor", t); + } + try { // Per the javadocs on Condition, do not depend on the condition alone as a start gate // since spurious wake ups are possible. while (!startedServing.get()) startCondition.await(); @@ -6078,6 +6083,18 @@ t.start(); } + private static void startPauseMonitor(HiveConf conf) throws Exception { + try { + Class.forName("org.apache.hadoop.util.JvmPauseMonitor"); + org.apache.hadoop.util.JvmPauseMonitor pauseMonitor = + new org.apache.hadoop.util.JvmPauseMonitor(conf); + pauseMonitor.start(); + } catch (Throwable t) { + LOG.warn("Could not initiate the JvmPauseMonitor thread." + + " GCs and Pauses may not be warned upon.", t); + } + } + private static void startCompactorInitiator(HiveConf conf) throws Exception { if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_INITIATOR_ON)) { MetaStoreThread initiator = Index: metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java (revision 1673556) +++ metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java (working copy) @@ -25,23 +25,33 @@ /** * The sequence of colStatNames. */ - static String[] colStatNames = new String[] { "LONG_LOW_VALUE", - "LONG_HIGH_VALUE", "DOUBLE_LOW_VALUE", "DOUBLE_HIGH_VALUE", - "BIG_DECIMAL_LOW_VALUE", "BIG_DECIMAL_HIGH_VALUE", "NUM_NULLS", - "NUM_DISTINCTS", "AVG_COL_LEN", "MAX_COL_LEN", "NUM_TRUES", "NUM_FALSES" }; - + static String[] colStatNames = new String[] { "LONG_LOW_VALUE", "LONG_HIGH_VALUE", + "DOUBLE_LOW_VALUE", "DOUBLE_HIGH_VALUE", "BIG_DECIMAL_LOW_VALUE", "BIG_DECIMAL_HIGH_VALUE", + "NUM_NULLS", "NUM_DISTINCTS", "AVG_COL_LEN", "MAX_COL_LEN", "NUM_TRUES", "NUM_FALSES", + "AVG_NDV_LONG", "AVG_NDV_DOUBLE", "AVG_NDV_DECIMAL", "SUM_NUM_DISTINCTS" }; + /** * The indexes for colstats. */ - static HashMap indexMaps = new HashMap(){{ - put("long", new Integer [] {0,1,6,7}); - put("double", new Integer [] {2,3,6,7}); - put("string", new Integer [] {8,9,6,7}); - put("boolean", new Integer [] {10,11,6}); - put("binary", new Integer [] {8,9,6}); - put("decimal", new Integer [] {4,5,6,7}); - put("default", new Integer [] {0,1,2,3,4,5,6,7,8,9,10,11}); -}}; + static HashMap indexMaps = new HashMap() { + { + put("bigint", new Integer[] { 0, 1, 6, 7, 12, 15 }); + put("int", new Integer[] { 0, 1, 6, 7, 12, 15 }); + put("smallint", new Integer[] { 0, 1, 6, 7, 12, 15 }); + put("tinyint", new Integer[] { 0, 1, 6, 7, 12, 15 }); + put("timestamp", new Integer[] { 0, 1, 6, 7, 12, 15 }); + put("long", new Integer[] { 0, 1, 6, 7, 12, 15 }); + put("double", new Integer[] { 2, 3, 6, 7, 13, 15 }); + put("float", new Integer[] { 2, 3, 6, 7, 13, 15 }); + put("varchar", new Integer[] { 8, 9, 6, 7, 15 }); + put("char", new Integer[] { 8, 9, 6, 7, 15 }); + put("string", new Integer[] { 8, 9, 6, 7, 15 }); + put("boolean", new Integer[] { 10, 11, 6, 15 }); + put("binary", new Integer[] { 8, 9, 6, 15 }); + put("decimal", new Integer[] { 4, 5, 6, 7, 14, 15 }); + put("default", new Integer[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15 }); + } + }; /** * The sequence of colStatTypes. @@ -50,23 +60,24 @@ Long, Double, Decimal } - static ColStatType[] colStatTypes = new ColStatType[] { ColStatType.Long, - ColStatType.Long, ColStatType.Double, ColStatType.Double, - ColStatType.Decimal, ColStatType.Decimal, ColStatType.Long, - ColStatType.Long, ColStatType.Double, ColStatType.Long, ColStatType.Long, + static ColStatType[] colStatTypes = new ColStatType[] { ColStatType.Long, ColStatType.Long, + ColStatType.Double, ColStatType.Double, ColStatType.Decimal, ColStatType.Decimal, + ColStatType.Long, ColStatType.Long, ColStatType.Double, ColStatType.Long, ColStatType.Long, + ColStatType.Long, ColStatType.Double, ColStatType.Double, ColStatType.Double, ColStatType.Long }; /** * The sequence of aggregation function on colStats. */ static enum AggrType { - Min, Max, Sum + Min, Max, Sum, Avg } - static AggrType[] aggrTypes = new AggrType[] { AggrType.Min, AggrType.Max, - AggrType.Min, AggrType.Max, AggrType.Min, AggrType.Max, AggrType.Sum, - AggrType.Max, AggrType.Max, AggrType.Max, AggrType.Sum, AggrType.Sum }; - + static AggrType[] aggrTypes = new AggrType[] { AggrType.Min, AggrType.Max, AggrType.Min, + AggrType.Max, AggrType.Min, AggrType.Max, AggrType.Sum, AggrType.Max, AggrType.Max, + AggrType.Max, AggrType.Sum, AggrType.Sum, AggrType.Avg, AggrType.Avg, AggrType.Avg, + AggrType.Sum }; + public Object extrapolate(Object[] min, Object[] max, int colStatIndex, Map indexMap); Index: metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java (revision 1673556) +++ metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java (working copy) @@ -19,11 +19,8 @@ package org.apache.hadoop.hive.metastore; import java.math.BigDecimal; -import java.nio.ByteBuffer; import java.util.Map; -import org.apache.hadoop.hive.metastore.api.Decimal; - public class LinearExtrapolatePartStatus implements IExtrapolatePartStatus { @Override @@ -35,6 +32,15 @@ if (minInd == maxInd) { return min[0]; } + //note that recent metastore stores decimal in string. + double decimalmin= 0; + double decimalmax = 0; + if (colStatTypes[colStatIndex] == ColStatType.Decimal) { + BigDecimal bdmin = new BigDecimal(min[0].toString()); + decimalmin = bdmin.doubleValue(); + BigDecimal bdmax = new BigDecimal(max[0].toString()); + decimalmax = bdmax.doubleValue(); + } if (aggrTypes[colStatIndex] == AggrType.Max) { if (minInd < maxInd) { // right border is the max @@ -45,15 +51,9 @@ return (Double) ((Double) min[0] + (((Double) max[0] - (Double) min[0]) * (rightBorderInd - minInd) / (maxInd - minInd))); } else { - Decimal dmax = (Decimal) max[0]; - BigDecimal bdmax = new BigDecimal(dmax.toString()); - double doublemax = bdmax.doubleValue(); - Decimal dmin = (Decimal) min[0]; - BigDecimal bdmin = new BigDecimal(dmin.toString()); - double doublemin = bdmin.doubleValue(); - double ret = doublemin + (doublemax - doublemin) + double ret = decimalmin + (decimalmax - decimalmin) * (rightBorderInd - minInd) / (maxInd - minInd); - return createThriftDecimal(String.valueOf(ret)); + return String.valueOf(ret); } } else { // left border is the max @@ -62,17 +62,11 @@ * minInd / (minInd - maxInd)); } else if (colStatTypes[colStatIndex] == ColStatType.Double) { return (Double) ((Double) min[0] + ((Double) max[0] - (Double) min[0]) - * minInd / (maxInd - minInd)); + * minInd / (minInd - maxInd)); } else { - Decimal dmax = (Decimal) max[0]; - BigDecimal bdmax = new BigDecimal(dmax.toString()); - double doublemax = bdmax.doubleValue(); - Decimal dmin = (Decimal) min[0]; - BigDecimal bdmin = new BigDecimal(dmin.toString()); - double doublemin = bdmin.doubleValue(); - double ret = doublemin + (doublemax - doublemin) * minInd - / (maxInd - minInd); - return createThriftDecimal(String.valueOf(ret)); + double ret = decimalmin + (decimalmax - decimalmin) * minInd + / (minInd - maxInd); + return String.valueOf(ret); } } } else { @@ -87,16 +81,9 @@ * maxInd / (maxInd - minInd); return ret; } else { - Decimal dmax = (Decimal) max[0]; - BigDecimal bdmax = new BigDecimal(dmax.toString()); - double doublemax = bdmax.doubleValue(); - Decimal dmin = (Decimal) min[0]; - BigDecimal bdmin = new BigDecimal(dmin.toString()); - double doublemin = bdmin.doubleValue(); - double ret = doublemax - (doublemax - doublemin) * maxInd + double ret = decimalmax - (decimalmax - decimalmin) * maxInd / (maxInd - minInd); - return createThriftDecimal(String.valueOf(ret)); - + return String.valueOf(ret); } } else { // right border is the min @@ -109,24 +96,11 @@ * (rightBorderInd - maxInd) / (minInd - maxInd); return ret; } else { - Decimal dmax = (Decimal) max[0]; - BigDecimal bdmax = new BigDecimal(dmax.toString()); - double doublemax = bdmax.doubleValue(); - Decimal dmin = (Decimal) min[0]; - BigDecimal bdmin = new BigDecimal(dmin.toString()); - double doublemin = bdmin.doubleValue(); - double ret = doublemax - (doublemax - doublemin) + double ret = decimalmax - (decimalmax - decimalmin) * (rightBorderInd - maxInd) / (minInd - maxInd); - return createThriftDecimal(String.valueOf(ret)); + return String.valueOf(ret); } } } } - - private static Decimal createThriftDecimal(String s) { - BigDecimal d = new BigDecimal(s); - return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), - (short) d.scale()); - } - } Index: metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java (revision 1673556) +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java (working copy) @@ -803,6 +803,15 @@ if (value == null) return null; return value.toString(); } + + static Double extractSqlDouble(Object obj) throws MetaException { + if (obj == null) + return null; + if (!(obj instanceof Number)) { + throw new MetaException("Expected numeric type but got " + obj.getClass().getName()); + } + return ((Number) obj).doubleValue(); + } private static String trimCommaList(StringBuilder sb) { if (sb.length() > 0) { @@ -852,6 +861,7 @@ func.apply(entry.getValue(), fields); fields = null; } + Deadline.checkTimeout(); } int rv = list.size(); query.closeAll(); @@ -1081,10 +1091,13 @@ } public AggrStats aggrColStatsForPartitions(String dbName, String tableName, - List partNames, List colNames) throws MetaException { + List partNames, List colNames, boolean useDensityFunctionForNDVEstimation) throws MetaException { long partsFound = partsFoundForPartitions(dbName, tableName, partNames, colNames); List stats = columnStatisticsObjForPartitions(dbName, - tableName, partNames, colNames, partsFound); + tableName, partNames, colNames, partsFound, useDensityFunctionForNDVEstimation); + LOG.info("useDensityFunctionForNDVEstimation = " + useDensityFunctionForNDVEstimation + + "\npartsFound = " + partsFound + "\nColumnStatisticsObj = " + + Arrays.toString(stats.toArray())); return new AggrStats(stats, partsFound); } @@ -1113,15 +1126,33 @@ return partsFound; } - private List columnStatisticsObjForPartitions( - String dbName, String tableName, List partNames, - List colNames, long partsFound) throws MetaException { + private List columnStatisticsObjForPartitions(String dbName, + String tableName, List partNames, List colNames, long partsFound, boolean useDensityFunctionForNDVEstimation) + throws MetaException { // TODO: all the extrapolation logic should be moved out of this class, - // only mechanical data retrieval should remain here. + // only mechanical data retrieval should remain here. String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", " + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), " - + "min(\"BIG_DECIMAL_LOW_VALUE\"), max(\"BIG_DECIMAL_HIGH_VALUE\"), sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " - + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\") from \"PART_COL_STATS\"" + + "min(cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal)), max(cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)), " + + "sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " + + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), " + // The following data is used to compute a partitioned table's NDV based + // on partitions' NDV when useDensityFunctionForNDVEstimation = true. Global NDVs cannot be + // accurately derived from partition NDVs, because the domain of column value two partitions + // can overlap. If there is no overlap then global NDV is just the sum + // of partition NDVs (UpperBound). But if there is some overlay then + // global NDV can be anywhere between sum of partition NDVs (no overlap) + // and same as one of the partition NDV (domain of column value in all other + // partitions is subset of the domain value in one of the partition) + // (LowerBound).But under uniform distribution, we can roughly estimate the global + // NDV by leveraging the min/max values. + // And, we also guarantee that the estimation makes sense by comparing it to the + // UpperBound (calculated by "sum(\"NUM_DISTINCTS\")") + // and LowerBound (calculated by "max(\"NUM_DISTINCTS\")") + + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")," + + "sum(\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "; String queryText = null; long start = 0; @@ -1133,14 +1164,13 @@ // Check if the status of all the columns of all the partitions exists // Extrapolation is not needed. if (partsFound == partNames.size()) { - queryText = commonPrefix - + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; start = doTrace ? System.nanoTime() : 0; query = pm.newQuery("javax.jdo.query.SQL", queryText); - qResult = executeWithArray(query, prepareParams( - dbName, tableName, partNames, colNames), queryText); + qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), + queryText); if (qResult == null) { query.closeAll(); return Lists.newArrayList(); @@ -1148,10 +1178,10 @@ end = doTrace ? System.nanoTime() : 0; timingTrace(doTrace, queryText, start, end); List list = ensureList(qResult); - List colStats = new ArrayList( - list.size()); + List colStats = new ArrayList(list.size()); for (Object[] row : list) { - colStats.add(prepareCSObj(row, 0)); + colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation)); + Deadline.checkTimeout(); } query.closeAll(); return colStats; @@ -1159,18 +1189,16 @@ // Extrapolation is needed for some columns. // In this case, at least a column status for a partition is missing. // We need to extrapolate this partition based on the other partitions - List colStats = new ArrayList( - colNames.size()); + List colStats = new ArrayList(colNames.size()); queryText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") " - + " from \"PART_COL_STATS\"" - + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; start = doTrace ? System.nanoTime() : 0; query = pm.newQuery("javax.jdo.query.SQL", queryText); - qResult = executeWithArray(query, prepareParams( - dbName, tableName, partNames, colNames), queryText); + qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), + queryText); end = doTrace ? System.nanoTime() : 0; timingTrace(doTrace, queryText, start, end); if (qResult == null) { @@ -1193,25 +1221,26 @@ } else { extraColumnNameTypeParts.put(colName, new String[] { colType, String.valueOf(count) }); } + Deadline.checkTimeout(); } query.closeAll(); // Extrapolation is not needed for columns noExtraColumnNames if (noExtraColumnNames.size() != 0) { - queryText = commonPrefix - + " and \"COLUMN_NAME\" in ("+ makeParams(noExtraColumnNames.size()) + ")" - + " and \"PARTITION_NAME\" in ("+ makeParams(partNames.size()) +")" - + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; + queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + + makeParams(noExtraColumnNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; start = doTrace ? System.nanoTime() : 0; query = pm.newQuery("javax.jdo.query.SQL", queryText); - qResult = executeWithArray(query, prepareParams( - dbName, tableName, partNames, noExtraColumnNames), queryText); + qResult = executeWithArray(query, + prepareParams(dbName, tableName, partNames, noExtraColumnNames), queryText); if (qResult == null) { query.closeAll(); return Lists.newArrayList(); } list = ensureList(qResult); for (Object[] row : list) { - colStats.add(prepareCSObj(row, 0)); + colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation)); + Deadline.checkTimeout(); } end = doTrace ? System.nanoTime() : 0; timingTrace(doTrace, queryText, start, end); @@ -1226,37 +1255,42 @@ } // get sum for all columns to reduce the number of queries Map> sumMap = new HashMap>(); - queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\")" + queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), sum(\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " - + " and \"COLUMN_NAME\" in (" +makeParams(extraColumnNameTypeParts.size())+ ")" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + " and \"COLUMN_NAME\" in (" + + makeParams(extraColumnNameTypeParts.size()) + + ")" + + " and \"PARTITION_NAME\" in (" + + makeParams(partNames.size()) + + ")" + " group by \"COLUMN_NAME\""; start = doTrace ? System.nanoTime() : 0; query = pm.newQuery("javax.jdo.query.SQL", queryText); List extraColumnNames = new ArrayList(); extraColumnNames.addAll(extraColumnNameTypeParts.keySet()); - qResult = executeWithArray(query, prepareParams( - dbName, tableName, partNames, extraColumnNames), queryText); + qResult = executeWithArray(query, + prepareParams(dbName, tableName, partNames, extraColumnNames), queryText); if (qResult == null) { query.closeAll(); return Lists.newArrayList(); } list = ensureList(qResult); // see the indexes for colstats in IExtrapolatePartStatus - Integer[] sumIndex = new Integer[] { 6, 10, 11 }; + Integer[] sumIndex = new Integer[] { 6, 10, 11, 15 }; for (Object[] row : list) { Map indexToObject = new HashMap(); for (int ind = 1; ind < row.length; ind++) { indexToObject.put(sumIndex[ind - 1], row[ind]); } + // row[0] is the column name sumMap.put((String) row[0], indexToObject); + Deadline.checkTimeout(); } end = doTrace ? System.nanoTime() : 0; timingTrace(doTrace, queryText, start, end); query.closeAll(); - for (Map.Entry entry : extraColumnNameTypeParts - .entrySet()) { + for (Map.Entry entry : extraColumnNameTypeParts.entrySet()) { Object[] row = new Object[IExtrapolatePartStatus.colStatNames.length + 2]; String colName = entry.getKey(); String colType = entry.getValue()[0]; @@ -1265,12 +1299,20 @@ row[0] = colName; // fill in coltype row[1] = colType; - // use linear extrapolation. more complicated one can be added in the future. + // use linear extrapolation. more complicated one can be added in the + // future. IExtrapolatePartStatus extrapolateMethod = new LinearExtrapolatePartStatus(); // fill in colstatus - Integer[] index = IExtrapolatePartStatus.indexMaps.get(colType - .toLowerCase()); - //if the colType is not the known type, long, double, etc, then get all index. + Integer[] index = null; + boolean decimal = false; + if (colType.toLowerCase().startsWith("decimal")) { + index = IExtrapolatePartStatus.indexMaps.get("decimal"); + decimal = true; + } else { + index = IExtrapolatePartStatus.indexMaps.get(colType.toLowerCase()); + } + // if the colType is not the known type, long, double, etc, then get + // all index. if (index == null) { index = IExtrapolatePartStatus.indexMaps.get("default"); } @@ -1285,20 +1327,27 @@ Long val = extractSqlLong(o); row[2 + colStatIndex] = (Long) (val / sumVal * (partNames.size())); } - } else { + } else if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Min + || IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Max) { // if the aggregation type is min/max, we extrapolate from the // left/right borders - queryText = "select \"" - + colStatName - + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" - + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" - + " and \"COLUMN_NAME\" = ?" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" - + " order by \'" + colStatName + "\'"; + if (!decimal) { + queryText = "select \"" + colStatName + + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + " order by \"" + colStatName + "\""; + } else { + queryText = "select \"" + colStatName + + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + " order by cast(\"" + colStatName + "\" as decimal)"; + } start = doTrace ? System.nanoTime() : 0; query = pm.newQuery("javax.jdo.query.SQL", queryText); - qResult = executeWithArray(query, prepareParams( - dbName, tableName, partNames, Arrays.asList(colName)), queryText); + qResult = executeWithArray(query, + prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText); if (qResult == null) { query.closeAll(); return Lists.newArrayList(); @@ -1312,12 +1361,39 @@ if (min[0] == null || max[0] == null) { row[2 + colStatIndex] = null; } else { - row[2 + colStatIndex] = extrapolateMethod.extrapolate(min, max, - colStatIndex, indexMap); + row[2 + colStatIndex] = extrapolateMethod.extrapolate(min, max, colStatIndex, + indexMap); } + } else { + // if the aggregation type is avg, we use the average on the + // existing ones. + queryText = "select " + + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")" + + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\""; + start = doTrace ? System.nanoTime() : 0; + query = pm.newQuery("javax.jdo.query.SQL", queryText); + qResult = executeWithArray(query, + prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText); + if (qResult == null) { + query.closeAll(); + return Lists.newArrayList(); + } + fqr = (ForwardQueryResult) qResult; + Object[] avg = (Object[]) (fqr.get(0)); + // colStatIndex=12,13,14 respond to "AVG_LONG", "AVG_DOUBLE", + // "AVG_DECIMAL" + row[2 + colStatIndex] = avg[colStatIndex - 12]; + end = doTrace ? System.nanoTime() : 0; + timingTrace(doTrace, queryText, start, end); + query.closeAll(); } } - colStats.add(prepareCSObj(row, 0)); + colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation)); + Deadline.checkTimeout(); } } return colStats; @@ -1335,6 +1411,17 @@ return cso; } + private ColumnStatisticsObj prepareCSObjWithAdjustedNDV(Object[] row, int i, + boolean useDensityFunctionForNDVEstimation) throws MetaException { + ColumnStatisticsData data = new ColumnStatisticsData(); + ColumnStatisticsObj cso = new ColumnStatisticsObj((String) row[i++], (String) row[i++], data); + Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++], declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++], avgLong = row[i++], avgDouble = row[i++], avgDecimal = row[i++], sumDist = row[i++]; + StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data, llow, lhigh, dlow, dhigh, + declow, dechigh, nulls, dist, avglen, maxlen, trues, falses, avgLong, avgDouble, + avgDecimal, sumDist, useDensityFunctionForNDVEstimation); + return cso; + } + private Object[] prepareParams(String dbName, String tableName, List partNames, List colNames) throws MetaException { @@ -1389,6 +1476,7 @@ } lastPartName = partName; from = i; + Deadline.checkTimeout(); } timingTrace(doTrace, queryText, start, queryTime); @@ -1416,6 +1504,7 @@ csd.setLastAnalyzed(extractSqlLong(laObj)); } csos.add(prepareCSObj(row, offset)); + Deadline.checkTimeout(); } result.setStatsObj(csos); return result; Index: metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (revision 1673556) +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (working copy) @@ -620,10 +620,10 @@ */ static public boolean validateColumnType(String type) { int last = 0; - boolean lastAlphaDigit = Character.isLetterOrDigit(type.charAt(last)); + boolean lastAlphaDigit = isValidTypeChar(type.charAt(last)); for (int i = 1; i <= type.length(); i++) { if (i == type.length() - || Character.isLetterOrDigit(type.charAt(i)) != lastAlphaDigit) { + || isValidTypeChar(type.charAt(i)) != lastAlphaDigit) { String token = type.substring(last, i); last = i; if (!hiveThriftTypeMap.contains(token)) { @@ -635,6 +635,10 @@ return true; } + private static boolean isValidTypeChar(char c) { + return Character.isLetterOrDigit(c) || c == '_'; + } + public static String validateSkewedColNames(List cols) { if (null == cols) { return null; @@ -720,6 +724,12 @@ "timestamp"); typeToThriftTypeMap.put( org.apache.hadoop.hive.serde.serdeConstants.DECIMAL_TYPE_NAME, "decimal"); + typeToThriftTypeMap.put( + org.apache.hadoop.hive.serde.serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME, + org.apache.hadoop.hive.serde.serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME); + typeToThriftTypeMap.put( + org.apache.hadoop.hive.serde.serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME, + org.apache.hadoop.hive.serde.serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME); } static Set hiveThriftTypeMap; //for validation Index: metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (revision 1673556) +++ metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (working copy) @@ -1817,6 +1817,7 @@ } for (MPartition mp : src) { dest.add(convertToPart(mp)); + Deadline.checkTimeout(); } return dest; } @@ -1826,6 +1827,7 @@ List parts = new ArrayList(mparts.size()); for (MPartition mp : mparts) { parts.add(convertToPart(dbName, tblName, mp)); + Deadline.checkTimeout(); } return parts; } @@ -3121,10 +3123,6 @@ MTable origTable = mIndex.getOrigTable(); MTable indexTable = mIndex.getIndexTable(); - String[] qualified = MetaStoreUtils.getQualifiedName( - origTable.getDatabase().getName(), indexTable.getTableName()); - String indexTableName = qualified[0] + "." + qualified[1]; - return new Index( mIndex.getIndexName(), mIndex.getIndexHandlerClass(), @@ -3132,7 +3130,7 @@ origTable.getTableName(), mIndex.getCreateTime(), mIndex.getLastAccessTime(), - indexTableName, + indexTable.getTableName(), convertToStorageDescriptor(mIndex.getSd()), mIndex.getParameters(), mIndex.getDeferredRebuild()); @@ -6053,6 +6051,7 @@ desc.setLastAnalyzed(mStat.getLastAnalyzed()); } statObjs.add(StatObjectConverter.getTableColumnStatisticsObj(mStat)); + Deadline.checkTimeout(); } return new ColumnStatistics(desc, statObjs); } @@ -6101,6 +6100,7 @@ } curList.add(StatObjectConverter.getPartitionColumnStatisticsObj(mStatsObj)); lastPartName = partName; + Deadline.checkTimeout(); } return result; } @@ -6111,12 +6111,13 @@ @Override public AggrStats get_aggr_stats_for(String dbName, String tblName, final List partNames, final List colNames) throws MetaException, NoSuchObjectException { + final boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(getConf(), HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION); return new GetHelper(dbName, tblName, true, false) { @Override protected AggrStats getSqlResult(GetHelper ctx) throws MetaException { return directSql.aggrColStatsForPartitions(dbName, tblName, partNames, - colNames); + colNames, useDensityFunctionForNDVEstimation); } @Override protected AggrStats getJdoResult(GetHelper ctx) Index: metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java (revision 1673556) +++ metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java (working copy) @@ -24,13 +24,13 @@ import java.lang.reflect.Method; import java.lang.reflect.Proxy; import java.lang.reflect.UndeclaredThrowableException; +import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.TApplicationException; import org.apache.thrift.TException; @@ -51,14 +51,17 @@ private final IMetaStoreClient base; private final int retryLimit; private final long retryDelaySeconds; + private final Map metaCallTimeMap; + protected RetryingMetaStoreClient(HiveConf hiveConf, HiveMetaHookLoader hookLoader, - Class msClientClass) throws MetaException { + Map metaCallTimeMap, Class msClientClass) throws MetaException { this.retryLimit = hiveConf.getIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES); this.retryDelaySeconds = hiveConf.getTimeVar( HiveConf.ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY, TimeUnit.SECONDS); + this.metaCallTimeMap = metaCallTimeMap; reloginExpiringKeytabUser(); this.base = MetaStoreUtils.newInstance(msClientClass, new Class[] { @@ -67,14 +70,20 @@ public static IMetaStoreClient getProxy(HiveConf hiveConf, HiveMetaHookLoader hookLoader, String mscClassName) throws MetaException { + return getProxy(hiveConf, hookLoader, null, mscClassName); + } - Class baseClass = (Class) - MetaStoreUtils.getClass(mscClassName); + public static IMetaStoreClient getProxy(HiveConf hiveConf, HiveMetaHookLoader hookLoader, + Map metaCallTimeMap, String mscClassName) throws MetaException { - RetryingMetaStoreClient handler = new RetryingMetaStoreClient(hiveConf, hookLoader, baseClass); + Class baseClass = (Class) MetaStoreUtils + .getClass(mscClassName); - return (IMetaStoreClient) Proxy.newProxyInstance(RetryingMetaStoreClient.class.getClassLoader(), - baseClass.getInterfaces(), handler); + RetryingMetaStoreClient handler = new RetryingMetaStoreClient(hiveConf, hookLoader, + metaCallTimeMap, baseClass); + + return (IMetaStoreClient) Proxy.newProxyInstance( + RetryingMetaStoreClient.class.getClassLoader(), baseClass.getInterfaces(), handler); } @Override @@ -88,7 +97,15 @@ if(retriesMade > 0){ base.reconnect(); } - ret = method.invoke(base, args); + if (metaCallTimeMap == null) { + ret = method.invoke(base, args); + } else { + // need to capture the timing + long startTime = System.currentTimeMillis(); + ret = method.invoke(base, args); + long timeTaken = System.currentTimeMillis() - startTime; + addMethodTime(method, timeTaken); + } break; } catch (UndeclaredThrowableException e) { throw e.getCause(); @@ -116,7 +133,31 @@ return ret; } + private void addMethodTime(Method method, long timeTaken) { + String methodStr = getMethodString(method); + Long curTime = metaCallTimeMap.get(methodStr); + if (curTime != null) { + timeTaken += curTime; + } + metaCallTimeMap.put(methodStr, timeTaken); + } + /** + * @param method + * @return String representation with arg types. eg getDatabase_(String, ) + */ + private String getMethodString(Method method) { + StringBuilder methodSb = new StringBuilder(method.getName()); + methodSb.append("_("); + for (Class paramClass : method.getParameterTypes()) { + methodSb.append(paramClass.getSimpleName()); + methodSb.append(", "); + } + methodSb.append(")"); + return methodSb.toString(); + } + + /** * Relogin if login user is logged in using keytab * Relogin is actually done by ugi code only if sufficient time has passed * A no-op if kerberos security is not enabled Index: metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java (revision 1673556) +++ metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java (working copy) @@ -476,6 +476,133 @@ } } + public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, + Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, + Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses, + Object avgLong, Object avgDouble, Object avgDecimal, Object sumDist, + boolean useDensityFunctionForNDVEstimation) throws MetaException { + colType = colType.toLowerCase(); + if (colType.equals("boolean")) { + BooleanColumnStatsData boolStats = new BooleanColumnStatsData(); + boolStats.setNumFalses(MetaStoreDirectSql.extractSqlLong(falses)); + boolStats.setNumTrues(MetaStoreDirectSql.extractSqlLong(trues)); + boolStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + data.setBooleanStats(boolStats); + } else if (colType.equals("string") || colType.startsWith("varchar") + || colType.startsWith("char")) { + StringColumnStatsData stringStats = new StringColumnStatsData(); + stringStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + stringStats.setAvgColLen((Double) avglen); + stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); + stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist)); + data.setStringStats(stringStats); + } else if (colType.equals("binary")) { + BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); + binaryStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + binaryStats.setAvgColLen((Double) avglen); + binaryStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen)); + data.setBinaryStats(binaryStats); + } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") + || colType.equals("tinyint") || colType.equals("timestamp")) { + LongColumnStatsData longStats = new LongColumnStatsData(); + longStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + if (lhigh != null) { + longStats.setHighValue(MetaStoreDirectSql.extractSqlLong(lhigh)); + } + if (llow != null) { + longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow)); + } + long lowerBound = MetaStoreDirectSql.extractSqlLong(dist); + long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist); + if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null + && MetaStoreDirectSql.extractSqlDouble(avgLong) != 0.0) { + // We have estimation, lowerbound and higherbound. We use estimation if + // it is between lowerbound and higherbound. + long estimation = MetaStoreDirectSql + .extractSqlLong((MetaStoreDirectSql.extractSqlLong(lhigh) - MetaStoreDirectSql + .extractSqlLong(llow)) / MetaStoreDirectSql.extractSqlDouble(avgLong)); + if (estimation < lowerBound) { + longStats.setNumDVs(lowerBound); + } else if (estimation > higherBound) { + longStats.setNumDVs(higherBound); + } else { + longStats.setNumDVs(estimation); + } + } else { + longStats.setNumDVs(lowerBound); + } + data.setLongStats(longStats); + } else if (colType.equals("double") || colType.equals("float")) { + DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + doubleStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + if (dhigh != null) { + doubleStats.setHighValue((Double) dhigh); + } + if (dlow != null) { + doubleStats.setLowValue((Double) dlow); + } + long lowerBound = MetaStoreDirectSql.extractSqlLong(dist); + long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist); + if (useDensityFunctionForNDVEstimation && dhigh != null && dlow != null && avgDouble != null + && MetaStoreDirectSql.extractSqlDouble(avgDouble) != 0.0) { + long estimation = MetaStoreDirectSql + .extractSqlLong((MetaStoreDirectSql.extractSqlLong(dhigh) - MetaStoreDirectSql + .extractSqlLong(dlow)) / MetaStoreDirectSql.extractSqlDouble(avgDouble)); + if (estimation < lowerBound) { + doubleStats.setNumDVs(lowerBound); + } else if (estimation > higherBound) { + doubleStats.setNumDVs(higherBound); + } else { + doubleStats.setNumDVs(estimation); + } + } else { + doubleStats.setNumDVs(lowerBound); + } + data.setDoubleStats(doubleStats); + } else if (colType.startsWith("decimal")) { + DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); + decimalStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls)); + Decimal low = null; + Decimal high = null; + BigDecimal blow = null; + BigDecimal bhigh = null; + if (dechigh instanceof BigDecimal) { + bhigh = (BigDecimal) dechigh; + high = new Decimal(ByteBuffer.wrap(bhigh.unscaledValue().toByteArray()), + (short) bhigh.scale()); + } else if (dechigh instanceof String) { + bhigh = new BigDecimal((String) dechigh); + high = createThriftDecimal((String) dechigh); + } + decimalStats.setHighValue(high); + if (declow instanceof BigDecimal) { + blow = (BigDecimal) declow; + low = new Decimal(ByteBuffer.wrap(blow.unscaledValue().toByteArray()), (short) blow.scale()); + } else if (dechigh instanceof String) { + blow = new BigDecimal((String) declow); + low = createThriftDecimal((String) declow); + } + decimalStats.setLowValue(low); + long lowerBound = MetaStoreDirectSql.extractSqlLong(dist); + long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist); + if (useDensityFunctionForNDVEstimation && dechigh != null && declow != null && avgDecimal != null + && MetaStoreDirectSql.extractSqlDouble(avgDecimal) != 0.0) { + long estimation = MetaStoreDirectSql.extractSqlLong(MetaStoreDirectSql.extractSqlLong(bhigh + .subtract(blow).floatValue() / MetaStoreDirectSql.extractSqlDouble(avgDecimal))); + if (estimation < lowerBound) { + decimalStats.setNumDVs(lowerBound); + } else if (estimation > higherBound) { + decimalStats.setNumDVs(higherBound); + } else { + decimalStats.setNumDVs(estimation); + } + } else { + decimalStats.setNumDVs(lowerBound); + } + data.setDecimalStats(decimalStats); + } + } + private static Decimal createThriftDecimal(String s) { BigDecimal d = new BigDecimal(s); return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short)d.scale()); @@ -484,4 +611,5 @@ private static String createJdoDecimalString(Decimal d) { return new BigDecimal(new BigInteger(d.getUnscaled()), d.getScale()).toString(); } + } Index: metastore/src/java/org/apache/hadoop/hive/metastore/events/AddPartitionEvent.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/events/AddPartitionEvent.java (revision 1673556) +++ metastore/src/java/org/apache/hadoop/hive/metastore/events/AddPartitionEvent.java (working copy) @@ -61,18 +61,18 @@ return table; } - /** - * @return List of partitions. - */ - public List getPartitions() { - return partitions; - } + // Note : List getPartitions() removed with HIVE-9609 because it will result in OOM errors with large add_partitions. + /** * @return Iterator for partitions. */ public Iterator getPartitionIterator() { - return partitionSpecProxy == null ? null : partitionSpecProxy.getPartitionIterator(); + if (partitions != null){ + return partitions.iterator(); + } else { + return partitionSpecProxy == null ? null : partitionSpecProxy.getPartitionIterator(); + } } } Index: packaging/src/main/assembly/bin.xml =================================================================== --- packaging/src/main/assembly/bin.xml (revision 1673556) +++ packaging/src/main/assembly/bin.xml (working copy) @@ -146,6 +146,7 @@ ${project.parent.basedir}/conf *.template + ivysettings.xml conf Index: pom.xml =================================================================== --- pom.xml (revision 1673556) +++ pom.xml (working copy) @@ -124,8 +124,9 @@ 0.98.9-hadoop1 0.98.9-hadoop2 - 4.2.5 - 4.2.5 + 4.4 + 4.4 + 2.4.0 1.9.2 0.3.2 5.5.1 @@ -147,7 +148,7 @@ 1.9.5 2.0.0-M5 4.0.23.Final - 1.6.0rc3 + 1.6.0rc6 0.12.0 2.5.0 1.0.1 @@ -1092,6 +1093,16 @@ org.apache.hadoop hadoop-common ${hadoop-23.version} + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpclient + + org.apache.hadoop Index: ql/pom.xml =================================================================== --- ql/pom.xml (revision 1673556) +++ ql/pom.xml (working copy) @@ -163,6 +163,11 @@ ${libfb303.version} + org.apache.ivy + ivy + ${ivy.version} + + org.apache.thrift libthrift ${libthrift.version} Index: ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt =================================================================== --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt (revision 1673556) +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt (working copy) @@ -83,25 +83,27 @@ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputVector[0] = vector1[0] vector2[0]; } else if (inputColVector1.isRepeating) { + final vector1Value = vector1[0]; if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] vector2[i]; + outputVector[i] = vector1Value vector2[i]; } } else { for(int i = 0; i != n; i++) { - outputVector[i] = vector1[0] vector2[i]; + outputVector[i] = vector1Value vector2[i]; } } } else if (inputColVector2.isRepeating) { + final vector2Value = vector2[0]; if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] vector2[0]; + outputVector[i] = vector1[i] vector2Value; } } else { for(int i = 0; i != n; i++) { - outputVector[i] = vector1[i] vector2[0]; + outputVector[i] = vector1[i] vector2Value; } } } else { Index: ql/src/java/org/apache/hadoop/hive/ql/Context.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/Context.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/Context.java (working copy) @@ -23,6 +23,7 @@ import java.io.IOException; import java.net.URI; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -84,6 +85,7 @@ private final Configuration conf; protected int pathid = 10000; protected boolean explain = false; + protected String cboInfo; protected boolean explainLogical = false; protected String cmd = ""; // number of previous attempts @@ -695,4 +697,13 @@ public AcidUtils.Operation getAcidOperation() { return acidOperation; } + + public String getCboInfo() { + return cboInfo; + } + + public void setCboInfo(String cboInfo) { + this.cboInfo = cboInfo; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/Driver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java (working copy) @@ -63,7 +63,6 @@ import org.apache.hadoop.hive.ql.hooks.PostExecute; import org.apache.hadoop.hive.ql.hooks.PreExecute; import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.hive.ql.hooks.Redactor; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.lockmgr.HiveLock; import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode; @@ -485,7 +484,6 @@ + explainOutput); } } - return 0; } catch (Exception e) { ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage()); @@ -508,10 +506,19 @@ return error.getErrorCode(); } finally { perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.COMPILE); + dumpMetaCallTimingWithoutEx("compilation"); restoreSession(queryState); } } + private void dumpMetaCallTimingWithoutEx(String phase) { + try { + Hive.get().dumpAndClearMetaCallTiming(phase); + } catch (HiveException he) { + LOG.warn("Caught exception attempting to write metadata call information " + he, he); + } + } + /** * Returns EXPLAIN EXTENDED output for a semantically * analyzed query. @@ -1182,7 +1189,6 @@ return createProcessorResponse(ret); } } - ret = execute(); if (ret != 0) { //if needRequireLock is false, the release here will do nothing because there is no lock @@ -1307,7 +1313,6 @@ public int execute() throws CommandNeedRetryException { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_EXECUTE); - boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME)); int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH); @@ -1318,6 +1323,9 @@ try { LOG.info("Starting command: " + queryStr); + // compile and execute can get called from different threads in case of HS2 + // so clear timing in this thread's Hive object before proceeding. + Hive.get().clearMetaCallTiming(); plan.setStarted(); @@ -1548,6 +1556,7 @@ if (noName) { conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, ""); } + dumpMetaCallTimingWithoutEx("execution"); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_EXECUTE); Map stats = SessionState.get().getMapRedStats(); Index: ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (working copy) @@ -429,6 +429,7 @@ "Alter table partition type {0} does not support cascade", true), DROP_NATIVE_FUNCTION(10301, "Cannot drop native function"), + UPDATE_CANNOT_UPDATE_BUCKET_VALUE(10302, "Updating values of bucketing columns is not supported. Column {0}.", true), //========================== 20000 range starts here ========================// SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."), @@ -443,6 +444,9 @@ "is controlled by hive.exec.max.dynamic.partitions and hive.exec.max.dynamic.partitions.pernode. "), PARTITION_SCAN_LIMIT_EXCEEDED(20005, "Number of partitions scanned (={0}) on table {1} exceeds limit" + " (={2}). This is controlled by hive.limit.query.max.table.partition.", true), + OP_NOT_ALLOWED_IN_AUTOCOMMIT(20006, "Operation {0} is not allowed when autoCommit=true.", true),//todo: better SQLState? + OP_NOT_ALLOWED_IN_TXN(20007, "Operation {0} is not allowed in a transaction. TransactionID={1}.", true), + OP_NOT_ALLOWED_WITHOUT_TXN(2008, "Operation {0} is not allowed since autoCommit=false and there is no active transaction", true), //========================== 30000 range starts here ========================// STATSPUBLISHER_NOT_OBTAINED(30000, "StatsPublisher cannot be obtained. " + @@ -508,7 +512,7 @@ static { for (ErrorMsg errorMsg : values()) { if (errorMsg.format != null) { - String pattern = errorMsg.mesg.replaceAll("\\{.*\\}", ".*"); + String pattern = errorMsg.mesg.replaceAll("\\{[0-9]+\\}", ".*"); formatToErrorMsgMap.put(Pattern.compile("^" + pattern + "$"), errorMsg); } else { mesgToErrorMsgMap.put(errorMsg.getMsg().trim(), errorMsg); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (working copy) @@ -691,11 +691,11 @@ Byte alias = order[i]; AbstractRowContainer> alw = storage[alias]; - if (alw.rowCount() != 1) { + if (!alw.isSingleRow()) { allOne = false; } - if (alw.rowCount() == 0) { + if (!alw.hasRows()) { alw.addRow(dummyObj[i]); hasNulls = true; } else if (condn[i].getPreserved()) { @@ -721,16 +721,16 @@ AbstractRowContainer> alw = storage[alias]; if (noOuterJoin) { - if (alw.rowCount() == 0) { + if (!alw.hasRows()) { return; - } else if (alw.rowCount() > 1) { + } else if (!alw.isSingleRow()) { mayHasMoreThanOne = true; } } else { - if (alw.rowCount() == 0) { + if (!alw.hasRows()) { hasEmpty = true; alw.addRow(dummyObj[i]); - } else if (!hasEmpty && alw.rowCount() == 1) { + } else if (!hasEmpty && alw.isSingleRow()) { if (hasAnyFiltered(alias, alw.rowIter().first())) { hasEmpty = true; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java (working copy) @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.tez.TezContext; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -78,6 +79,8 @@ transient List otherKey = null; transient List values = null; transient RecordSource[] sources; + transient WritableComparator[][] keyComparators; + transient List> originalParents = new ArrayList>(); @@ -105,7 +108,12 @@ nextKeyWritables = new ArrayList[maxAlias]; fetchDone = new boolean[maxAlias]; foundNextKeyGroup = new boolean[maxAlias]; + keyComparators = new WritableComparator[maxAlias][]; + for (Entry> entry : conf.getKeys().entrySet()) { + keyComparators[entry.getKey().intValue()] = new WritableComparator[entry.getValue().size()]; + } + int bucketSize; int oldVar = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE); @@ -279,7 +287,7 @@ result[pos] = -1; continue; } - result[pos] = compareKeys(key, smallestOne); + result[pos] = compareKeys(pos, key, smallestOne); if (result[pos] < 0) { smallestOne = key; } @@ -383,7 +391,7 @@ if (candidateStorage[pos] == null) { continue; } - if (this.candidateStorage[pos].rowCount() > 0) { + if (this.candidateStorage[pos].hasRows()) { dataInCache = true; break; } @@ -411,14 +419,16 @@ this.nextGroupStorage[t] = oldRowContainer; } + @SuppressWarnings("rawtypes") private boolean processKey(byte alias, List key) throws HiveException { List keyWritable = keyWritables[alias]; if (keyWritable == null) { // the first group. keyWritables[alias] = key; + keyComparators[alias] = new WritableComparator[key.size()]; return false; } else { - int cmp = compareKeys(key, keyWritable); + int cmp = compareKeys(alias, key, keyWritable); if (cmp != 0) { nextKeyWritables[alias] = key; return true; @@ -428,30 +438,42 @@ } @SuppressWarnings("rawtypes") - private int compareKeys(List k1, List k2) { - int ret = 0; + private int compareKeys(byte alias, List k1, List k2) { + final WritableComparator[] comparators = keyComparators[alias]; // join keys have difference sizes? - ret = k1.size() - k2.size(); - if (ret != 0) { - return ret; + if (k1.size() != k2.size()) { + return k1.size() - k2.size(); } - for (int i = 0; i < k1.size(); i++) { + if (comparators.length == 0) { + // cross-product - no keys really + return 0; + } + + if (comparators.length > 1) { + // rare case + return compareKeysMany(comparators, k1, k2); + } else { + return compareKey(comparators, 0, + (WritableComparable) k1.get(0), + (WritableComparable) k2.get(0), + nullsafes != null ? nullsafes[0]: false); + } + } + + @SuppressWarnings("rawtypes") + private int compareKeysMany(WritableComparator[] comparators, + final List k1, + final List k2) { + // invariant: k1.size == k2.size + int ret = 0; + final int size = k1.size(); + for (int i = 0; i < size; i++) { WritableComparable key_1 = (WritableComparable) k1.get(i); WritableComparable key_2 = (WritableComparable) k2.get(i); - if (key_1 == null && key_2 == null) { - if (nullsafes != null && nullsafes[i]) { - continue; - } else { - return -1; - } - } else if (key_1 == null) { - return -1; - } else if (key_2 == null) { - return 1; - } - ret = WritableComparator.get(key_1.getClass()).compare(key_1, key_2); + ret = compareKey(comparators, i, key_1, key_2, + nullsafes != null ? nullsafes[i] : false); if (ret != 0) { return ret; } @@ -459,6 +481,30 @@ return ret; } + @SuppressWarnings("rawtypes") + private int compareKey(final WritableComparator comparators[], final int pos, + final WritableComparable key_1, + final WritableComparable key_2, + final boolean nullsafe) { + + if (key_1 == null && key_2 == null) { + if (nullsafe) { + return 0; + } else { + return -1; + } + } else if (key_1 == null) { + return -1; + } else if (key_2 == null) { + return 1; + } + + if (comparators[pos] == null) { + comparators[pos] = WritableComparator.get(key_1.getClass()); + } + return comparators[pos].compare(key_1, key_2); + } + @SuppressWarnings("unchecked") private List mergeJoinComputeKeys(Object row, Byte alias) throws HiveException { if ((joinKeysObjectInspectors != null) && (joinKeysObjectInspectors[alias] != null)) { @@ -501,12 +547,13 @@ if (parent == null) { throw new HiveException("No valid parents."); } - Map dummyOps = parent.getTagToOperatorTree(); + Map dummyOps = + ((TezContext) (MapredContext.get())).getDummyOpsMap(); for (Entry connectOp : dummyOps.entrySet()) { if (connectOp.getValue().getChildOperators() == null - || connectOp.getValue().getChildOperators().isEmpty()) { - parentOperators.add(connectOp.getKey(), connectOp.getValue()); - connectOp.getValue().getChildOperators().add(this); + || connectOp.getValue().getChildOperators().isEmpty()) { + parentOperators.add(connectOp.getKey(), connectOp.getValue()); + connectOp.getValue().getChildOperators().add(this); } } super.initializeLocalWork(hconf); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -3404,13 +3404,13 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Partition part) throws HiveException { - List oldCols = (part == null ? tbl.getCols() : part.getCols()); - StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) { tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName())); tbl.setTableName(Utilities.getTableName(alterTbl.getNewName())); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCOLS) { + List oldCols = (part == null ? tbl.getCols() : part.getCols()); + StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); List newCols = alterTbl.getNewCols(); String serializationLib = sd.getSerdeInfo().getSerializationLib(); if (serializationLib.equals( @@ -3437,6 +3437,8 @@ sd.setCols(oldCols); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAMECOLUMN) { + List oldCols = (part == null ? tbl.getCols() : part.getCols()); + StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); List newCols = new ArrayList(); Iterator iterOldCols = oldCols.iterator(); String oldName = alterTbl.getOldColName(); @@ -3499,6 +3501,7 @@ sd.setCols(newCols); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.REPLACECOLS) { + StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); // change SerDe to LazySimpleSerDe if it is columnsetSerDe String serializationLib = sd.getSerdeInfo().getSerializationLib(); if (serializationLib.equals( @@ -3523,8 +3526,10 @@ tbl.getTTable().getParameters().remove(keyItr.next()); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDEPROPS) { + StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps()); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDE) { + StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); String serdeName = alterTbl.getSerdeName(); sd.getSerdeInfo().setSerializationLib(serdeName); if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) { @@ -3539,6 +3544,7 @@ } } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDFILEFORMAT) { + StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); sd.setInputFormat(alterTbl.getInputFormat()); sd.setOutputFormat(alterTbl.getOutputFormat()); if (alterTbl.getSerdeName() != null) { @@ -3559,6 +3565,7 @@ tbl.setProtectMode(mode); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCLUSTERSORTCOLUMN) { + StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); // validate sort columns and bucket columns List columns = Utilities.getColumnNamesFromFieldSchema(tbl .getCols()); @@ -3583,6 +3590,7 @@ sd.setSortCols(alterTbl.getSortColumns()); } } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) { + StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); String newLocation = alterTbl.getNewLocation(); try { URI locUri = new URI(newLocation); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (working copy) @@ -40,6 +40,8 @@ import java.util.TreeMap; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.jsonexplain.JsonParser; +import org.apache.hadoop.hive.common.jsonexplain.JsonParserFactory; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.DriverContext; @@ -47,7 +49,9 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.Explain; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -284,10 +288,30 @@ JSONObject jsonDependencies = getJSONDependencies(work); out.print(jsonDependencies); } else { - JSONObject jsonPlan = getJSONPlan(out, work); - if (work.isFormatted()) { - out.print(jsonPlan); - } + if (work.getDependency()) { + JSONObject jsonDependencies = getJSONDependencies(work); + out.print(jsonDependencies); + } else { + if (work.isUserLevelExplain()) { + JsonParser jsonParser = JsonParserFactory.getParser(conf); + if (jsonParser != null) { + work.setFormatted(true); + JSONObject jsonPlan = getJSONPlan(out, work); + if (work.getCboInfo() != null) { + jsonPlan.put("cboInfo", work.getCboInfo()); + } + jsonParser.print(jsonPlan, out); + } else { + throw new SemanticException( + "Hive UserLevelExplain only supports tez engine right now."); + } + } else { + JSONObject jsonPlan = getJSONPlan(out, work); + if (work.isFormatted()) { + out.print(jsonPlan); + } + } + } } out.close(); @@ -561,7 +585,17 @@ if (note instanceof Explain) { Explain xpl_note = (Explain) note; - if (extended || xpl_note.normalExplain()) { + boolean invokeFlag = false; + if (this.work.isUserLevelExplain()) { + invokeFlag = Level.USER.in(xpl_note.explainLevels()); + } else { + if (extended) { + invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels()); + } else { + invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels()); + } + } + if (invokeFlag) { keyJSONObject = xpl_note.displayName(); if (out != null) { out.print(indentString(indent)); @@ -584,6 +618,12 @@ String appender = isLogical ? " (" + operator.getOperatorId() + ")" : ""; JSONObject jsonOut = outputPlan(operator.getConf(), out, extended, jsonOutput, jsonOutput ? 0 : indent, appender); + if (this.work.isUserLevelExplain()) { + if (jsonOut != null && jsonOut.length() > 0) { + ((JSONObject) jsonOut.get(JSONObject.getNames(jsonOut)[0])).put("OperatorId:", + operator.getOperatorId()); + } + } if (jsonOutput) { json = jsonOut; } @@ -618,9 +658,18 @@ if (note instanceof Explain) { Explain xpl_note = (Explain) note; + boolean invokeFlag = false; + if (this.work.isUserLevelExplain()) { + invokeFlag = Level.USER.in(xpl_note.explainLevels()); + } else { + if (extended) { + invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels()); + } else { + invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels()); + } + } + if (invokeFlag) { - if (extended || xpl_note.normalExplain()) { - Object val = null; try { val = m.invoke(work); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -273,6 +273,7 @@ system.registerGenericUDF("date_sub", GenericUDFDateSub.class); system.registerGenericUDF("datediff", GenericUDFDateDiff.class); system.registerGenericUDF("add_months", GenericUDFAddMonths.class); + system.registerGenericUDF("months_between", GenericUDFMonthsBetween.class); system.registerUDF("get_json_object", UDFJson.class, false); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (working copy) @@ -498,26 +498,26 @@ private void reloadHashTable(HashPartition partition, HybridHashTableContainer hybridHtContainer) throws IOException, ClassNotFoundException, HiveException, SerDeException { - // Deserialize the on-disk hash table - // We're sure this part is smaller than memory limit - BytesBytesMultiHashMap restoredHashMap = partition.getHashMapFromDisk(); - int rowCount = restoredHashMap.getNumValues(); - LOG.info("Hybrid Grace Hash Join: Deserializing spilled hash partition..."); - LOG.info("Hybrid Grace Hash Join: Number of rows restored from hashmap: " + rowCount); // Merge the sidefile into the newly created hash table // This is where the spilling may happen again KeyValueContainer kvContainer = partition.getSidefileKVContainer(); - rowCount += kvContainer.size(); + int rowCount = kvContainer.size(); LOG.info("Hybrid Grace Hash Join: Number of rows restored from KeyValueContainer: " + kvContainer.size()); + // Deserialize the on-disk hash table + // We're sure this part is smaller than memory limit + BytesBytesMultiHashMap restoredHashMap = partition.getHashMapFromDisk(rowCount); + rowCount += restoredHashMap.getNumValues(); + LOG.info("Hybrid Grace Hash Join: Deserializing spilled hash partition..."); + LOG.info("Hybrid Grace Hash Join: Number of rows in hashmap: " + rowCount); + // If based on the new key count, keyCount is smaller than a threshold, // then just load the entire restored hashmap into memory. // The size of deserialized partition shouldn't exceed half of memory limit if (rowCount * hybridHtContainer.getTableRowSize() >= hybridHtContainer.getMemoryThreshold() / 2) { - throw new RuntimeException("Hybrid Grace Hash Join: Hash table cannot be reloaded since it" + - " will be greater than memory limit. Recursive spilling is currently not supported"); + LOG.info("Hybrid Grace Hash Join: Hash table reload can fail since it will be greater than memory limit. Recursive spilling is currently not supported"); } KeyValueHelper writeHelper = hybridHtContainer.getWriteHelper(); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (working copy) @@ -637,11 +637,6 @@ return null; } - @Override - public Map getTagToOperatorTree() { - return MapRecordProcessor.getConnectOps(); - } - public void initializeContexts() { Path fpath = getExecContext().getCurrentInputPath(); String nominalPath = getNominalPath(fpath); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (working copy) @@ -1350,12 +1350,4 @@ return childOperators; } } - - public Map getTagToOperatorTree() { - if ((parentOperators == null) || (parentOperators.size() == 0)) { - return null; - } - Map dummyOps = parentOperators.get(0).getTagToOperatorTree(); - return dummyOps; - } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (working copy) @@ -342,7 +342,7 @@ joinOneGroup(); dataInCache = false; for (byte pos = 0; pos < order.length; pos++) { - if (this.candidateStorage[pos].rowCount() > 0) { + if (this.candidateStorage[pos].hasRows()) { dataInCache = true; break; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java (working copy) @@ -45,14 +45,14 @@ // metastore tokens into a file String uname = UserGroupInformation.getLoginUser().getShortUserName(); FileSystem fs = FileSystem.get(conf); - Token fsToken = fs.getDelegationToken(uname); + Credentials cred = new Credentials(); + // Use method addDelegationTokens instead of getDelegationToken to get all the tokens including KMS. + fs.addDelegationTokens(uname, cred); tokenFile = File.createTempFile("hive_hadoop_delegation_token", null); tokenPath = new Path(tokenFile.toURI()); //write credential with token to file - Credentials cred = new Credentials(); - cred.addToken(fsToken.getService(), fsToken); cred.writeTokenStorageFile(tokenPath, conf); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (working copy) @@ -187,6 +187,7 @@ this.childTasks = childTasks; } + @Override public List getChildren() { return getChildTasks(); } @@ -521,7 +522,7 @@ return exception; } - void setException(Throwable ex) { + protected void setException(Throwable ex) { exception = ex; } @@ -542,10 +543,12 @@ return getId() + ":" + getType(); } + @Override public int hashCode() { return toString().hashCode(); } + @Override public boolean equals(Object obj) { return toString().equals(String.valueOf(obj)); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (working copy) @@ -46,6 +46,7 @@ import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.SQLException; +import java.sql.SQLFeatureNotSupportedException; import java.sql.SQLTransientException; import java.sql.Timestamp; import java.text.SimpleDateFormat; @@ -211,6 +212,8 @@ public static final String MAPRED_MAPPER_CLASS = "mapred.mapper.class"; public static final String MAPRED_REDUCER_CLASS = "mapred.reducer.class"; public static final String HIVE_ADDED_JARS = "hive.added.jars"; + public static String MAPNAME = "Map "; + public static String REDUCENAME = "Reducer "; /** * ReduceField: @@ -242,6 +245,7 @@ private static ThreadLocal> gWorkMap = new ThreadLocal>() { + @Override protected Map initialValue() { return new HashMap(); } @@ -307,12 +311,13 @@ public static Path setMergeWork(JobConf conf, MergeJoinWork mergeJoinWork, Path mrScratchDir, boolean useCache) { for (BaseWork baseWork : mergeJoinWork.getBaseWorkList()) { - setBaseWork(conf, baseWork, mrScratchDir, baseWork.getName() + MERGE_PLAN_NAME, useCache); + String prefix = baseWork.getName(); + setBaseWork(conf, baseWork, mrScratchDir, prefix + MERGE_PLAN_NAME, useCache); String prefixes = conf.get(DagUtils.TEZ_MERGE_WORK_FILE_PREFIXES); if (prefixes == null) { - prefixes = baseWork.getName(); + prefixes = prefix; } else { - prefixes = prefixes + "," + baseWork.getName(); + prefixes = prefixes + "," + prefix; } conf.set(DagUtils.TEZ_MERGE_WORK_FILE_PREFIXES, prefixes); } @@ -432,7 +437,13 @@ + MAPRED_REDUCER_CLASS +" was "+ conf.get(MAPRED_REDUCER_CLASS)) ; } } else if (name.contains(MERGE_PLAN_NAME)) { - gWork = deserializePlan(in, MapWork.class, conf); + if (name.startsWith(MAPNAME)) { + gWork = deserializePlan(in, MapWork.class, conf); + } else if (name.startsWith(REDUCENAME)) { + gWork = deserializePlan(in, ReduceWork.class, conf); + } else { + throw new RuntimeException("Unknown work type: " + name); + } } gWorkMap.get().put(path, gWork); } else if (LOG.isDebugEnabled()) { @@ -457,9 +468,9 @@ } } - public static Map> getMapWorkAllScratchColumnVectorTypeMaps(Configuration hiveConf) { + public static Map getMapWorkVectorScratchColumnTypeMap(Configuration hiveConf) { MapWork mapWork = getMapWork(hiveConf); - return mapWork.getAllScratchColumnVectorTypeMaps(); + return mapWork.getVectorScratchColumnTypeMap(); } public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) { @@ -3104,6 +3115,24 @@ } } + public static void setQueryTimeout(java.sql.Statement stmt, int timeout) throws SQLException { + if (timeout < 0) { + LOG.info("Invalid query timeout " + timeout); + return; + } + try { + stmt.setQueryTimeout(timeout); + } catch (SQLException e) { + String message = e.getMessage() == null ? null : e.getMessage().toLowerCase(); + if (e instanceof SQLFeatureNotSupportedException || + (message != null && (message.contains("implemented") || message.contains("supported")))) { + LOG.info("setQueryTimeout is not supported"); + return; + } + throw e; + } + } + /** * Introducing a random factor to the wait time before another retry. * The wait time is dependent on # of failures and a random factor. Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractRowContainer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractRowContainer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractRowContainer.java (working copy) @@ -37,6 +37,17 @@ public void addRow(ROW t) throws HiveException; /** + * @return whether the row container has at least 1 row. + * NOTE: Originally we named this isEmpty, but that name conflicted with another interface. + */ + public boolean hasRows() throws HiveException; + + /** + * @return whether the row container has 1 row. + */ + public boolean isSingleRow() throws HiveException; + + /** * @return number of elements in the RowContainer */ public int rowCount() throws HiveException; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java (working copy) @@ -146,7 +146,7 @@ private long[] refs; private int startingHashBitCount, hashBitCount; - private int metricPutConflict = 0, metricExpands = 0, metricExpandsUs = 0; + private int metricPutConflict = 0, metricGetConflict = 0, metricExpands = 0, metricExpandsMs = 0; /** We have 39 bits to store list pointer from the first record; this is size limit */ final static long MAX_WB_SIZE = ((long)1) << 38; @@ -184,6 +184,240 @@ this(initialCapacity, loadFactor, wbSize, -1); } + public class ThreadSafeGetter { + private WriteBuffers.Position position = new WriteBuffers.Position(); + public byte getValueResult(byte[] key, int offset, int length, + BytesBytesMultiHashMap.Result hashMapResult) { + return BytesBytesMultiHashMap.this.getValueResult(key, offset, length, hashMapResult, position); + } + + public void populateValue(WriteBuffers.ByteSegmentRef valueRef) { + // Convenience method, populateValue is thread-safe. + BytesBytesMultiHashMap.this.populateValue(valueRef); + } + } + + /** + * The result of looking up a key in the multi-hash map. + * + * This object can read through the 0, 1, or more values found for the key. + */ + public static class Result { + + // Whether there are more than 0 rows. + private boolean hasRows; + + // We need a pointer to the hash map since this class must be static to support having + // multiple hash tables with Hybrid Grace partitioning. + private BytesBytesMultiHashMap hashMap; + + // And, a mutable read position for thread safety when sharing a hash map. + private WriteBuffers.Position readPos; + + // These values come from setValueResult when it finds a key. These values allow this + // class to read (and re-read) the values. + private long firstOffset; + private boolean hasList; + private long offsetAfterListRecordKeyLen; + + // When we have multiple values, we save the next value record's offset here. + private long nextTailOffset; + + // 0-based index of which row we are on. + private long readIndex; + + // A reference to the current row. + private WriteBuffers.ByteSegmentRef byteSegmentRef; + + public Result() { + hasRows = false; + byteSegmentRef = new WriteBuffers.ByteSegmentRef(); + } + + /** + * @return Whether there are 1 or more values. + */ + public boolean hasRows() { + // NOTE: Originally we named this isEmpty, but that name conflicted with another interface. + return hasRows; + } + + /** + * @return Whether there is just 1 value row. + */ + public boolean isSingleRow() { + return !hasList; + } + + /** + * Set internal values for reading the values after finding a key. + * + * @param hashMap + * The hash map we found the key in. + * @param firstOffset + * The absolute offset of the first record in the write buffers. + * @param hasList + * Whether there are multiple values (true) or just a single value (false). + * @param offsetAfterListRecordKeyLen + * The offset of just after the key length in the list record. Or, 0 when single row. + * @param readPos + * Holds mutable read position for thread safety. + */ + public void set(BytesBytesMultiHashMap hashMap, long firstOffset, boolean hasList, + long offsetAfterListRecordKeyLen, WriteBuffers.Position readPos) { + + this.hashMap = hashMap; + this.readPos = readPos; + + this.firstOffset = firstOffset; + this.hasList = hasList; + this.offsetAfterListRecordKeyLen = offsetAfterListRecordKeyLen; + + // Position at first row. + readIndex = 0; + nextTailOffset = -1; + + hasRows = true; + } + + public WriteBuffers.ByteSegmentRef first() { + if (!hasRows) { + return null; + } + + // Position at first row. + readIndex = 0; + nextTailOffset = -1; + + return internalRead(); + } + + public WriteBuffers.ByteSegmentRef next() { + if (!hasRows) { + return null; + } + + return internalRead(); + } + + /** + * Read the current value. + * + * @return + * The ByteSegmentRef to the current value read. + */ + private WriteBuffers.ByteSegmentRef internalRead() { + + if (!hasList) { + + /* + * Single value. + */ + + if (readIndex > 0) { + return null; + } + + // For a non-list (i.e. single value), the offset is for the variable length long (VLong) + // holding the value length (followed by the key length). + hashMap.writeBuffers.setReadPoint(firstOffset, readPos); + int valueLength = (int) hashMap.writeBuffers.readVLong(readPos); + + // The value is before the offset. Make byte segment reference absolute. + byteSegmentRef.reset(firstOffset - valueLength, valueLength); + hashMap.writeBuffers.populateValue(byteSegmentRef); + + readIndex++; + return byteSegmentRef; + } + + /* + * Multiple values. + */ + + if (readIndex == 0) { + // For a list, the value and key lengths of 1st record were overwritten with the + // relative offset to a new list record. + long relativeOffset = hashMap.writeBuffers.readNByteLong(firstOffset, 5, readPos); + + // At the beginning of the list record will be the value length. + hashMap.writeBuffers.setReadPoint(firstOffset + relativeOffset, readPos); + int valueLength = (int) hashMap.writeBuffers.readVLong(readPos); + + // The value is before the list record offset. Make byte segment reference absolute. + byteSegmentRef.reset(firstOffset - valueLength, valueLength); + hashMap.writeBuffers.populateValue(byteSegmentRef); + + readIndex++; + return byteSegmentRef; + } + + if (readIndex == 1) { + // We remembered the offset of just after the key length in the list record. + // Read the absolute offset to the 2nd value. + nextTailOffset = hashMap.writeBuffers.readNByteLong(offsetAfterListRecordKeyLen, 5, readPos); + if (nextTailOffset <= 0) { + throw new Error("Expecting a second value"); + } + } else if (nextTailOffset <= 0) { + return null; + } + + hashMap.writeBuffers.setReadPoint(nextTailOffset, readPos); + + // Get the value length. + int valueLength = (int) hashMap.writeBuffers.readVLong(readPos); + + // Now read the relative offset to next record. Next record is always before the + // previous record in the write buffers (see writeBuffers javadoc). + long delta = hashMap.writeBuffers.readVLong(readPos); + long newTailOffset = delta == 0 ? 0 : (nextTailOffset - delta); + + // The value is before the value record offset. Make byte segment reference absolute. + byteSegmentRef.reset(nextTailOffset - valueLength, valueLength); + hashMap.writeBuffers.populateValue(byteSegmentRef); + + nextTailOffset = newTailOffset; + readIndex++; + return byteSegmentRef; + } + + /** + * @return Whether we have read all the values or not. + */ + public boolean isEof() { + // LOG.info("BytesBytesMultiHashMap isEof hasRows " + hasRows + " hasList " + hasList + " readIndex " + readIndex + " nextTailOffset " + nextTailOffset); + if (!hasRows) { + return true; + } + + if (!hasList) { + return (readIndex > 0); + } else { + // Multiple values. + if (readIndex <= 1) { + // Careful: We have not read the list record and 2nd value yet, so nextTailOffset + // is not valid yet. + return false; + } else { + return (nextTailOffset <= 0); + } + } + } + + /** + * Lets go of any references to a hash map. + */ + public void forget() { + hashMap = null; + readPos = null; + byteSegmentRef.reset(0, 0); + hasRows = false; + readIndex = 0; + nextTailOffset = -1; + } + } + /** The source of keys and values to put into hashtable; avoids byte copying. */ public static interface KvSource { /** Write key into output. */ @@ -201,7 +435,7 @@ } /** - * Adds new value to new or existing key in hashmap. + * Adds new value to new or existing key in hashmap. Not thread-safe. * @param kv Keyvalue writer. Each method will be called at most once. */ private static final byte[] FOUR_ZEROES = new byte[] { 0, 0, 0, 0 }; @@ -247,53 +481,46 @@ ++numValues; } + public ThreadSafeGetter createGetterForThread() { + return new ThreadSafeGetter(); + } + + /** Not thread-safe! Use createGetterForThread. */ + public byte getValueResult(byte[] key, int offset, int length, Result hashMapResult) { + return getValueResult(key, offset, length, hashMapResult, writeBuffers.getReadPosition()); + } + /** - * Gets "lazy" values for a key (as a set of byte segments in underlying buffer). + * Finds a key. Values can be read with the supplied result object. + * * @param key Key buffer. - * @param length Length of the key in buffer. - * @param result The list to use to store the results. - * @return the state byte for the key (see class description). + * @param offset the offset to the key in the buffer + * @param hashMapResult The object to fill in that can read the values. + * @param readPos Holds mutable read position for thread safety. + * @return The state byte. */ - public byte getValueRefs(byte[] key, int length, List result) { + private byte getValueResult(byte[] key, int offset, int length, Result hashMapResult, + WriteBuffers.Position readPos) { + + hashMapResult.forget(); + // First, find first record for the key. - result.clear(); - long ref = findKeyRefToRead(key, length); + long ref = findKeyRefToRead(key, offset, length, readPos); if (ref == 0) { return 0; } + boolean hasList = Ref.hasList(ref); // This relies on findKeyRefToRead doing key equality check and leaving read ptr where needed. - long lrPtrOffset = hasList ? writeBuffers.getReadPoint() : 0; + long offsetAfterListRecordKeyLen = hasList ? writeBuffers.getReadPoint(readPos) : 0; - writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref)); - int valueLength = (int)writeBuffers.readVLong(); - // LOG.info("Returning value at " + (Ref.getOffset(ref) - valueLength) + " length " + valueLength); - result.add(new WriteBuffers.ByteSegmentRef(Ref.getOffset(ref) - valueLength, valueLength)); - byte stateByte = Ref.getStateByte(ref); - if (!hasList) { - return stateByte; - } + hashMapResult.set(this, Ref.getOffset(ref), hasList, offsetAfterListRecordKeyLen, + readPos); - // There're multiple records for the key; get the offset of the next one. - long nextTailOffset = writeBuffers.readFiveByteULong(lrPtrOffset); - // LOG.info("Next tail offset " + nextTailOffset); - - while (nextTailOffset > 0) { - writeBuffers.setReadPoint(nextTailOffset); - valueLength = (int)writeBuffers.readVLong(); - // LOG.info("Returning value at " + (nextTailOffset - valueLength) + " length " + valueLength); - result.add(new WriteBuffers.ByteSegmentRef(nextTailOffset - valueLength, valueLength)); - // Now read the relative offset to next record. Next record is always before the - // previous record in the write buffers (see writeBuffers javadoc). - long delta = writeBuffers.readVLong(); - nextTailOffset = delta == 0 ? 0 : (nextTailOffset - delta); - // LOG.info("Delta " + delta + ", next tail offset " + nextTailOffset); - } - return stateByte; + return Ref.getStateByte(ref); } - /** * Take the segment reference from {@link #getValueRefs(byte[], int, List)} * result and makes it self-contained - adds byte array where the value is stored, and @@ -341,6 +568,17 @@ this.keysAssigned = 0; } + public void expandAndRehashToTarget(int estimateNewRowCount) { + int oldRefsCount = refs.length; + int newRefsCount = oldRefsCount + estimateNewRowCount; + if (resizeThreshold <= newRefsCount) { + newRefsCount = + (Long.bitCount(newRefsCount) == 1) ? estimateNewRowCount : nextHighestPowerOfTwo(newRefsCount); + expandAndRehashImpl(newRefsCount); + LOG.info("Expand and rehash to " + newRefsCount + " from " + oldRefsCount); + } + } + private static void validateCapacity(long capacity) { if (Long.bitCount(capacity) != 1) { throw new AssertionError("Capacity must be a power of two"); @@ -388,9 +626,10 @@ * @param length Read key length. * @return The ref to use for reading. */ - private long findKeyRefToRead(byte[] key, int length) { + private long findKeyRefToRead(byte[] key, int offset, int length, + WriteBuffers.Position readPos) { final int bucketMask = (refs.length - 1); - int hashCode = writeBuffers.hashCode(key, 0, length); + int hashCode = writeBuffers.hashCode(key, offset, length); int slot = hashCode & bucketMask; // LOG.info("Read hash code for " + Utils.toStringBinary(key, 0, length) // + " is " + Integer.toBinaryString(hashCode) + " - " + slot); @@ -402,9 +641,10 @@ if (ref == 0) { return 0; } - if (isSameKey(key, length, ref, hashCode)) { + if (isSameKey(key, offset, length, ref, hashCode, readPos)) { return ref; } + ++metricGetConflict; probeSlot += (++i); if (i > largestNumberOfSteps) { // We know we never went that far when we were inserting. @@ -453,7 +693,7 @@ if (!compareHashBits(ref, hashCode)) { return false; // Hash bits in ref don't match. } - writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref)); + writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, null)); int valueLength = (int)writeBuffers.readVLong(), keyLength = (int)writeBuffers.readVLong(); if (keyLength != cmpLength) { return false; @@ -471,15 +711,21 @@ /** * Same as {@link #isSameKey(long, int, long, int)} but for externally stored key. */ - private boolean isSameKey(byte[] key, int length, long ref, int hashCode) { + private boolean isSameKey(byte[] key, int offset, int length, long ref, int hashCode, + WriteBuffers.Position readPos) { if (!compareHashBits(ref, hashCode)) { return false; // Hash bits don't match. } - writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref)); - int valueLength = (int)writeBuffers.readVLong(), keyLength = (int)writeBuffers.readVLong(); + writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, readPos), readPos); + int valueLength = (int)writeBuffers.readVLong(readPos), + keyLength = (int)writeBuffers.readVLong(readPos); long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength); // See the comment in the other isSameKey - return writeBuffers.isEqual(key, length, keyOffset, keyLength); + if (offset == 0) { + return writeBuffers.isEqual(key, length, keyOffset, keyLength); + } else { + return writeBuffers.isEqual(key, offset, length, keyOffset, keyLength); + } } private boolean compareHashBits(long ref, int hashCode) { @@ -491,19 +737,24 @@ * @param ref Reference. * @return The offset to value and key length vlongs of the first record referenced by ref. */ - private long getFirstRecordLengthsOffset(long ref) { + private long getFirstRecordLengthsOffset(long ref, WriteBuffers.Position readPos) { long tailOffset = Ref.getOffset(ref); if (Ref.hasList(ref)) { - long relativeOffset = writeBuffers.readFiveByteULong(tailOffset); + long relativeOffset = (readPos == null) ? writeBuffers.readNByteLong(tailOffset, 5) + : writeBuffers.readNByteLong(tailOffset, 5, readPos); tailOffset += relativeOffset; } return tailOffset; } private void expandAndRehash() { - long expandTime = System.nanoTime(); + long capacity = refs.length << 1; + expandAndRehashImpl(capacity); + } + + private void expandAndRehashImpl(long capacity) { + long expandTime = System.currentTimeMillis(); final long[] oldRefs = refs; - long capacity = refs.length << 1; validateCapacity(capacity); long[] newRefs = new long[(int)capacity]; @@ -522,10 +773,10 @@ // TODO: we could actually store a bit flag in ref indicating whether this is a hash // match or a probe, and in the former case use hash bits (for a first few resizes). // int hashCodeOrPart = oldSlot | Ref.getNthHashBit(oldRef, startingHashBitCount, newHashBitCount); - writeBuffers.setReadPoint(getFirstRecordLengthsOffset(oldRef)); + writeBuffers.setReadPoint(getFirstRecordLengthsOffset(oldRef, null)); // Read the value and key length for the first record. - int hashCode = writeBuffers.readInt(Ref.getOffset(oldRef) - - writeBuffers.readVLong() - writeBuffers.readVLong() - 4); + int hashCode = (int)writeBuffers.readNByteLong(Ref.getOffset(oldRef) + - writeBuffers.readVLong() - writeBuffers.readVLong() - 4, 4); int probeSteps = relocateKeyRef(newRefs, oldRef, hashCode); maxSteps = Math.max(probeSteps, maxSteps); } @@ -533,9 +784,8 @@ this.largestNumberOfSteps = maxSteps; this.hashBitCount = newHashBitCount; this.resizeThreshold = (int)(capacity * loadFactor); - metricExpandsUs += (System.nanoTime() - expandTime); + metricExpandsMs += (System.currentTimeMillis() - expandTime); ++metricExpands; - } /** @@ -576,7 +826,7 @@ */ private void addRecordToList(long lrPtrOffset, long tailOffset) { // Now, insert this record into the list. - long prevHeadOffset = writeBuffers.readFiveByteULong(lrPtrOffset); + long prevHeadOffset = writeBuffers.readNByteLong(lrPtrOffset, 5); // LOG.info("Reading offset " + prevHeadOffset + " at " + lrPtrOffset); assert prevHeadOffset < tailOffset; // We replace an earlier element, must have lower offset. writeBuffers.writeFiveByteULong(lrPtrOffset, tailOffset); @@ -632,11 +882,10 @@ return tailOffset; } - /** Writes the debug dump of the table into logs. */ + /** Writes the debug dump of the table into logs. Not thread-safe. */ public void debugDumpTable() { StringBuilder dump = new StringBuilder(keysAssigned + " keys\n"); TreeMap byteIntervals = new TreeMap(); - List results = new ArrayList(); int examined = 0; for (int slot = 0; slot < refs.length; ++slot) { long ref = refs[slot]; @@ -644,10 +893,11 @@ continue; } ++examined; - long recOffset = getFirstRecordLengthsOffset(ref); + long recOffset = getFirstRecordLengthsOffset(ref, null); long tailOffset = Ref.getOffset(ref); writeBuffers.setReadPoint(recOffset); - int valueLength = (int)writeBuffers.readVLong(), keyLength = (int)writeBuffers.readVLong(); + int valueLength = (int)writeBuffers.readVLong(), + keyLength = (int)writeBuffers.readVLong(); long ptrOffset = writeBuffers.getReadPoint(); if (Ref.hasList(ref)) { byteIntervals.put(recOffset, (int)(ptrOffset + 5 - recOffset)); @@ -658,9 +908,17 @@ byteIntervals.put(keyOffset - 4, keyLength + 4); writeBuffers.populateValue(fakeRef); System.arraycopy(fakeRef.getBytes(), (int)fakeRef.getOffset(), key, 0, keyLength); - getValueRefs(key, key.length, results); dump.append(Utils.toStringBinary(key, 0, key.length)).append(" ref [").append(dumpRef(ref)) - .append("]: ").append(results.size()).append(" rows\n"); + .append("]: "); + Result hashMapResult = new Result(); + getValueResult(key, 0, key.length, hashMapResult); + List results = new ArrayList(); + WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + results.add(hashMapResult.byteSegmentRef); + byteSegmentRef = hashMapResult.next(); + } + dump.append(results.size()).append(" rows\n"); for (int i = 0; i < results.size(); ++i) { WriteBuffers.ByteSegmentRef segment = results.get(i); byteIntervals.put(segment.getOffset(), @@ -753,7 +1011,8 @@ public void debugDumpMetrics() { LOG.info("Map metrics: keys allocated " + this.refs.length +", keys assigned " + keysAssigned + ", write conflict " + metricPutConflict + ", write max dist " + largestNumberOfSteps - + ", expanded " + metricExpands + " times in " + metricExpandsUs + "us"); + + ", read conflict " + metricGetConflict + + ", expanded " + metricExpands + " times in " + metricExpandsMs + "ms"); } private void debugDumpKeyProbe(long keyOffset, int keyLength, int hashCode, int finalSlot) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/FlatRowContainer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/FlatRowContainer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/FlatRowContainer.java (working copy) @@ -208,6 +208,16 @@ } @Override + public boolean hasRows() throws HiveException { + return rowCount() > 0; + } + + @Override + public boolean isSingleRow() throws HiveException { + return rowCount() == 1; + } + + @Override public int rowCount() throws HiveException { return rowLength > 0 ? (array.length / rowLength) : -rowLength; // see rowLength javadoc } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java (working copy) @@ -20,6 +20,7 @@ import com.esotericsoftware.kryo.Kryo; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -123,15 +124,20 @@ /* Restore the hashmap from disk by deserializing it. * Currently Kryo is used for this purpose. */ - public BytesBytesMultiHashMap getHashMapFromDisk() + public BytesBytesMultiHashMap getHashMapFromDisk(int initialCapacity) throws IOException, ClassNotFoundException { if (hashMapSpilledOnCreation) { - return new BytesBytesMultiHashMap(threshold, loadFactor, wbSize, -1); + return new BytesBytesMultiHashMap(Math.max(threshold, initialCapacity) , loadFactor, wbSize, -1); } else { InputStream inputStream = Files.newInputStream(hashMapLocalPath); com.esotericsoftware.kryo.io.Input input = new com.esotericsoftware.kryo.io.Input(inputStream); Kryo kryo = Utilities.runtimeSerializationKryo.get(); BytesBytesMultiHashMap restoredHashMap = kryo.readObject(input, BytesBytesMultiHashMap.class); + + if (initialCapacity > 0) { + restoredHashMap.expandAndRehashToTarget(initialCapacity); + } + input.close(); inputStream.close(); Files.delete(hashMapLocalPath); @@ -163,7 +169,8 @@ public HybridHashTableContainer(Configuration hconf, long keyCount, long memUsage, long tableSize) throws SerDeException { - this(HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD), + this(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT), + HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD), HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE), HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD), @@ -171,22 +178,27 @@ tableSize, keyCount, memUsage); } - private HybridHashTableContainer(int threshold, float loadFactor, int wbSize, + private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFactor, int wbSize, long noConditionalTaskThreshold, int memCheckFreq, long tableSize, long keyCount, long memUsage) throws SerDeException { + + int newKeyCount = HashMapWrapper.calculateTableSize( + keyCountAdj, threshold, loadFactor, keyCount); + memoryThreshold = noConditionalTaskThreshold; - tableRowSize = tableSize / keyCount; + tableRowSize = tableSize / newKeyCount; memoryCheckFrequency = memCheckFreq; int numPartitions = calcNumPartitions(tableSize, wbSize); // estimate # of partitions to create hashPartitions = new HashPartition[numPartitions]; int numPartitionsSpilledOnCreation = 0; long memoryAllocated = 0; + int initialCapacity = Math.max(newKeyCount / numPartitions, threshold / numPartitions); for (int i = 0; i < numPartitions; i++) { if (i == 0) { // We unconditionally create a hashmap for the first hash partition - hashPartitions[i] = new HashPartition(threshold, loadFactor, wbSize, memUsage, true); + hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, wbSize, memUsage, true); } else { - hashPartitions[i] = new HashPartition(threshold, loadFactor, wbSize, memUsage, + hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, wbSize, memUsage, memoryAllocated + wbSize < memoryThreshold); } if (isHashMapSpilledOnCreation(i)) { @@ -555,7 +567,7 @@ @Override public MapJoinRowContainer getCurrentRows() { - return currentValue.isEmpty() ? null : currentValue; + return !currentValue.hasRows() ? null : currentValue; } @Override @@ -568,8 +580,8 @@ private class ReusableRowContainer implements MapJoinRowContainer, AbstractRowContainer.RowIterator> { private byte aliasFilter; - private List refs; - private int currentRow; + private BytesBytesMultiHashMap.Result hashMapResult; + /** * Sometimes, when container is empty in multi-table mapjoin, we need to add a dummy row. * This container does not normally support adding rows; this is for the dummy row. @@ -589,6 +601,7 @@ valueStruct = null; // No rows? } uselessIndirection = new ByteArrayRef(); + hashMapResult = new BytesBytesMultiHashMap.Result(); clearRows(); } @@ -600,57 +613,58 @@ * the evaluation for this big table row will be postponed. */ public JoinUtil.JoinResult setFromOutput(Output output) throws HiveException { - if (refs == null) { - refs = new ArrayList(0); - } - int keyHash = WriteBuffers.murmurHash(output.getData(), 0, output.getLength()); partitionId = keyHash & (hashPartitions.length - 1); // If the target hash table is on disk, spill this row to disk as well to be processed later if (isOnDisk(partitionId)) { toSpillPartitionId = partitionId; - refs.clear(); + hashMapResult.forget(); return JoinUtil.JoinResult.SPILL; } else { - byte aliasFilter = hashPartitions[partitionId].hashMap.getValueRefs( - output.getData(), output.getLength(), refs); - this.aliasFilter = refs.isEmpty() ? (byte) 0xff : aliasFilter; - this.dummyRow = null; - if (refs.isEmpty()) { + aliasFilter = hashPartitions[partitionId].hashMap.getValueResult(output.getData(), 0, output.getLength(), hashMapResult); + dummyRow = null; + if (hashMapResult.hasRows()) { + return JoinUtil.JoinResult.MATCH; + } else { + aliasFilter = (byte) 0xff; return JoinUtil.JoinResult.NOMATCH; } - else { - return JoinUtil.JoinResult.MATCH; - } } } - public boolean isEmpty() { - return refs.isEmpty() && (dummyRow == null); + @Override + public boolean hasRows() { + return hashMapResult.hasRows() || (dummyRow != null); } + @Override + public boolean isSingleRow() { + if (!hashMapResult.hasRows()) { + return (dummyRow != null); + } + return hashMapResult.isSingleRow(); + } + // Implementation of row container @Override - public RowIterator> rowIter() throws HiveException { - currentRow = -1; + public AbstractRowContainer.RowIterator> rowIter() throws HiveException { return this; } @Override public int rowCount() throws HiveException { - return dummyRow != null ? 1 : refs.size(); + // For performance reasons we do not want to chase the values to the end to determine + // the count. Use hasRows and isSingleRow instead. + throw new UnsupportedOperationException("Getting the row count not supported"); } @Override public void clearRows() { // Doesn't clear underlying hashtable - if (refs != null) { - refs.clear(); - } + hashMapResult.forget(); dummyRow = null; - currentRow = -1; aliasFilter = (byte) 0xff; } @@ -667,36 +681,47 @@ // Implementation of row iterator @Override public List first() throws HiveException { - currentRow = 0; - return next(); - } - - @Override - public List next() throws HiveException { + // A little strange that we forget the dummy row on read. if (dummyRow != null) { List result = dummyRow; dummyRow = null; return result; } - if (currentRow < 0 || refs.size() < currentRow) throw new HiveException("No rows"); - if (refs.size() == currentRow) return null; - WriteBuffers.ByteSegmentRef ref = refs.get(currentRow++); + + WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first(); + if (byteSegmentRef == null) { + return null; + } else { + return uppack(byteSegmentRef); + } + + } + + @Override + public List next() throws HiveException { + + WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.next(); + if (byteSegmentRef == null) { + return null; + } else { + return uppack(byteSegmentRef); + } + + } + + private List uppack(WriteBuffers.ByteSegmentRef ref) throws HiveException { if (ref.getLength() == 0) { return EMPTY_LIST; // shortcut, 0 length means no fields } - if (ref.getBytes() == null) { - // partitionId is derived from previously calculated value in setFromOutput() - hashPartitions[partitionId].hashMap.populateValue(ref); - } uselessIndirection.setData(ref.getBytes()); valueStruct.init(uselessIndirection, (int)ref.getOffset(), ref.getLength()); - return valueStruct.getFieldsAsList(); + return valueStruct.getFieldsAsList(); // TODO: should we unset bytes after that? } @Override public void addRow(List t) { - if (dummyRow != null || !refs.isEmpty()) { + if (dummyRow != null || hashMapResult.hasRows()) { throw new RuntimeException("Cannot add rows when not empty"); } dummyRow = t; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java (working copy) @@ -417,7 +417,7 @@ @Override public MapJoinRowContainer getCurrentRows() { - return currentValue.isEmpty() ? null : currentValue; + return !currentValue.hasRows() ? null : currentValue; } @Override @@ -430,8 +430,11 @@ private class ReusableRowContainer implements MapJoinRowContainer, AbstractRowContainer.RowIterator> { private byte aliasFilter; - private List refs; - private int currentRow; + + /** Hash table wrapper specific to the container. */ + private final BytesBytesMultiHashMap.ThreadSafeGetter threadSafeHashMapGetter; + private BytesBytesMultiHashMap.Result hashMapResult; + /** * Sometimes, when container is empty in multi-table mapjoin, we need to add a dummy row. * This container does not normally support adding rows; this is for the dummy row. @@ -449,48 +452,56 @@ valueStruct = null; // No rows? } uselessIndirection = new ByteArrayRef(); + threadSafeHashMapGetter = hashMap.createGetterForThread(); + hashMapResult = new BytesBytesMultiHashMap.Result(); clearRows(); } public JoinUtil.JoinResult setFromOutput(Output output) { - if (refs == null) { - refs = new ArrayList(); - } - byte aliasFilter = hashMap.getValueRefs(output.getData(), output.getLength(), refs); - this.aliasFilter = refs.isEmpty() ? (byte) 0xff : aliasFilter; - this.dummyRow = null; - if (refs.isEmpty()) { + + aliasFilter = threadSafeHashMapGetter.getValueResult( + output.getData(), 0, output.getLength(), hashMapResult); + dummyRow = null; + if (hashMapResult.hasRows()) { + return JoinUtil.JoinResult.MATCH; + } else { + aliasFilter = (byte) 0xff; return JoinUtil.JoinResult.NOMATCH; } - else { - return JoinUtil.JoinResult.MATCH; - } + + } + + @Override + public boolean hasRows() { + return hashMapResult.hasRows() || (dummyRow != null); } - public boolean isEmpty() { - return refs.isEmpty() && (dummyRow == null); + @Override + public boolean isSingleRow() { + if (!hashMapResult.hasRows()) { + return (dummyRow != null); + } + return hashMapResult.isSingleRow(); } // Implementation of row container @Override public AbstractRowContainer.RowIterator> rowIter() throws HiveException { - currentRow = -1; return this; } @Override public int rowCount() throws HiveException { - return dummyRow != null ? 1 : refs.size(); + // For performance reasons we do not want to chase the values to the end to determine + // the count. Use hasRows and isSingleRow instead. + throw new UnsupportedOperationException("Getting the row count not supported"); } @Override public void clearRows() { // Doesn't clear underlying hashtable - if (refs != null) { - refs.clear(); - } + hashMapResult.forget(); dummyRow = null; - currentRow = -1; aliasFilter = (byte) 0xff; } @@ -507,30 +518,39 @@ // Implementation of row iterator @Override public List first() throws HiveException { - currentRow = 0; - return nextInternal(); + + // A little strange that we forget the dummy row on read. + if (dummyRow != null) { + List result = dummyRow; + dummyRow = null; + return result; + } + + WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first(); + if (byteSegmentRef == null) { + return null; + } else { + return uppack(byteSegmentRef); + } + } @Override public List next() throws HiveException { - return nextInternal(); + + WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.next(); + if (byteSegmentRef == null) { + return null; + } else { + return uppack(byteSegmentRef); + } + } - private List nextInternal() throws HiveException { - if (dummyRow != null) { - List result = dummyRow; - dummyRow = null; - return result; - } - if (currentRow < 0 || refs.size() < currentRow) throw new HiveException("No rows"); - if (refs.size() == currentRow) return null; - WriteBuffers.ByteSegmentRef ref = refs.get(currentRow++); + private List uppack(WriteBuffers.ByteSegmentRef ref) throws HiveException { if (ref.getLength() == 0) { return EMPTY_LIST; // shortcut, 0 length means no fields } - if (ref.getBytes() == null) { - hashMap.populateValue(ref); - } uselessIndirection.setData(ref.getBytes()); valueStruct.init(uselessIndirection, (int)ref.getOffset(), ref.getLength()); return valueStruct.getFieldsAsList(); // TODO: should we unset bytes after that? @@ -538,7 +558,7 @@ @Override public void addRow(List t) { - if (dummyRow != null || !refs.isEmpty()) { + if (dummyRow != null || hashMapResult.hasRows()) { throw new RuntimeException("Cannot add rows when not empty"); } dummyRow = t; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinEagerRowContainer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinEagerRowContainer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinEagerRowContainer.java (working copy) @@ -81,6 +81,16 @@ return null; } + @Override + public boolean hasRows() { + return list.size() > 0; + } + + @Override + public boolean isSingleRow() { + return list.size() == 1; + } + /** * Get the number of elements in the RowContainer. * Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java (working copy) @@ -331,6 +331,17 @@ } } + + @Override + public boolean hasRows() { + return size > 0; + } + + @Override + public boolean isSingleRow() { + return size == 1; + } + /** * Get the number of elements in the RowContainer. * Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java (working copy) @@ -95,7 +95,18 @@ internal.addRow(t); } + @Override + public boolean hasRows() throws HiveException { + return internal.hasRows(); + } + + @Override + public boolean isSingleRow() throws HiveException { + return internal.isSingleRow(); + } + + @Override public int rowCount() throws HiveException { return internal.rowCount(); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java (working copy) @@ -45,6 +45,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.Context; @@ -926,8 +927,9 @@ throws IOException { FileSystem destFS = dest.getFileSystem(conf); FileSystem sourceFS = src.getFileSystem(conf); - if (destFS.exists(dest)) { - return (sourceFS.getFileStatus(src).getLen() == destFS.getFileStatus(dest).getLen()); + FileStatus destStatus = FileUtils.getFileStatusOrNull(destFS, dest); + if (destStatus != null) { + return (sourceFS.getFileStatus(src).getLen() == destStatus.getLen()); } return false; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java (working copy) @@ -69,6 +69,7 @@ private final Configuration conf; private final JobConf jobConf; private final MRInputUserPayloadProto userPayloadProto; + private final MapWork work; private final SplitGrouper splitGrouper = new SplitGrouper(); @@ -86,7 +87,7 @@ // Read all credentials into the credentials instance stored in JobConf. ShimLoader.getHadoopShims().getMergedCredentials(jobConf); - MapWork work = Utilities.getMapWork(jobConf); + this.work = Utilities.getMapWork(jobConf); // Events can start coming in the moment the InputInitializer is created. The pruner // must be setup and initialized here so that it sets up it's structures to start accepting events. @@ -98,58 +99,64 @@ @Override public List initialize() throws Exception { - boolean sendSerializedEvents = - conf.getBoolean("mapreduce.tez.input.initializer.serialize.event.payload", true); + // Setup the map work for this thread. Pruning modified the work instance to potentially remove + // partitions. The same work instance must be used when generating splits. + Utilities.setMapWork(jobConf, work); + try { + boolean sendSerializedEvents = + conf.getBoolean("mapreduce.tez.input.initializer.serialize.event.payload", true); - // perform dynamic partition pruning - pruner.prune(); + // perform dynamic partition pruning + pruner.prune(); - InputSplitInfoMem inputSplitInfo = null; - String realInputFormatName = conf.get("mapred.input.format.class"); - boolean groupingEnabled = userPayloadProto.getGroupingEnabled(); - if (groupingEnabled) { - // Need to instantiate the realInputFormat - InputFormat inputFormat = - (InputFormat) ReflectionUtils.newInstance(JavaUtils.loadClass(realInputFormatName), - jobConf); + InputSplitInfoMem inputSplitInfo = null; + String realInputFormatName = conf.get("mapred.input.format.class"); + boolean groupingEnabled = userPayloadProto.getGroupingEnabled(); + if (groupingEnabled) { + // Need to instantiate the realInputFormat + InputFormat inputFormat = + (InputFormat) ReflectionUtils + .newInstance(JavaUtils.loadClass(realInputFormatName), + jobConf); - int totalResource = getContext().getTotalAvailableResource().getMemory(); - int taskResource = getContext().getVertexTaskResource().getMemory(); - int availableSlots = totalResource / taskResource; + int totalResource = getContext().getTotalAvailableResource().getMemory(); + int taskResource = getContext().getVertexTaskResource().getMemory(); + int availableSlots = totalResource / taskResource; - // Create the un-grouped splits - float waves = - conf.getFloat(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES, - TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT); + // Create the un-grouped splits + float waves = + conf.getFloat(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES, + TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT); - InputSplit[] splits = inputFormat.getSplits(jobConf, (int) (availableSlots * waves)); - LOG.info("Number of input splits: " + splits.length + ". " + availableSlots - + " available slots, " + waves + " waves. Input format is: " + realInputFormatName); + InputSplit[] splits = inputFormat.getSplits(jobConf, (int) (availableSlots * waves)); + LOG.info("Number of input splits: " + splits.length + ". " + availableSlots + + " available slots, " + waves + " waves. Input format is: " + realInputFormatName); - Multimap groupedSplits = - splitGrouper.generateGroupedSplits(jobConf, conf, splits, waves, availableSlots); - // And finally return them in a flat array - InputSplit[] flatSplits = groupedSplits.values().toArray(new InputSplit[0]); - LOG.info("Number of grouped splits: " + flatSplits.length); + Multimap groupedSplits = + splitGrouper.generateGroupedSplits(jobConf, conf, splits, waves, availableSlots); + // And finally return them in a flat array + InputSplit[] flatSplits = groupedSplits.values().toArray(new InputSplit[0]); + LOG.info("Number of grouped splits: " + flatSplits.length); - List locationHints = splitGrouper.createTaskLocationHints(flatSplits); + List locationHints = splitGrouper.createTaskLocationHints(flatSplits); + inputSplitInfo = + new InputSplitInfoMem(flatSplits, locationHints, flatSplits.length, null, jobConf); + } else { + // no need for grouping and the target #of tasks. + // This code path should never be triggered at the moment. If grouping is disabled, + // DAGUtils uses MRInputAMSplitGenerator. + // If this is used in the future - make sure to disable grouping in the payload, if it isn't already disabled + throw new RuntimeException( + "HiveInputFormat does not support non-grouped splits, InputFormatName is: " + + realInputFormatName); + // inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, false, 0); + } + + return createEventList(sendSerializedEvents, inputSplitInfo); + } finally { Utilities.clearWork(jobConf); - - inputSplitInfo = - new InputSplitInfoMem(flatSplits, locationHints, flatSplits.length, null, jobConf); - } else { - // no need for grouping and the target #of tasks. - // This code path should never be triggered at the moment. If grouping is disabled, - // DAGUtils uses MRInputAMSplitGenerator. - // If this is used in the future - make sure to disable grouping in the payload, if it isn't already disabled - throw new RuntimeException( - "HiveInputFormat does not support non-grouped splits, InputFormatName is: " - + realInputFormatName); - // inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, false, 0); } - - return createEventList(sendSerializedEvents, inputSplitInfo); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java (working copy) @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.exec.tez.tools.KeyValueInputMerger; import org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator; import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.serde2.Deserializer; @@ -82,14 +83,15 @@ private boolean abort = false; protected static final String MAP_PLAN_KEY = "__MAP_PLAN__"; private MapWork mapWork; - List mergeWorkList = null; + List mergeWorkList = null; List cacheKeys; ObjectCache cache; private static Map connectOps = new TreeMap(); - public MapRecordProcessor(final JobConf jconf) throws Exception { + public MapRecordProcessor(final JobConf jconf, final ProcessorContext context) throws Exception { + super(jconf, context); ObjectCache cache = ObjectCacheFactory.getCache(jconf); execContext = new ExecMapperContext(jconf); execContext.setJc(jconf); @@ -108,35 +110,14 @@ }); Utilities.setMapWork(jconf, mapWork); - String prefixes = jconf.get(DagUtils.TEZ_MERGE_WORK_FILE_PREFIXES); - if (prefixes != null) { - mergeWorkList = new ArrayList(); - - for (final String prefix : prefixes.split(",")) { - if (prefix == null || prefix.isEmpty()) { - continue; - } - - key = queryId + prefix; - cacheKeys.add(key); - - mergeWorkList.add( - (MapWork) cache.retrieve(key, - new Callable() { - @Override - public Object call() { - return Utilities.getMergeWork(jconf, prefix); - } - })); - } - } + mergeWorkList = getMergeWorkList(jconf, key, queryId, cache, cacheKeys); } @Override - void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrReporter, + void init(MRTaskReporter mrReporter, Map inputs, Map outputs) throws Exception { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); - super.init(jconf, processorContext, mrReporter, inputs, outputs); + super.init(mrReporter, inputs, outputs); MapredContext.init(true, new JobConf(jconf)); ((TezContext) MapredContext.get()).setInputs(inputs); @@ -174,7 +155,8 @@ connectOps.clear(); if (mergeWorkList != null) { MapOperator mergeMapOp = null; - for (MapWork mergeMapWork : mergeWorkList) { + for (BaseWork mergeWork : mergeWorkList) { + MapWork mergeMapWork = (MapWork) mergeWork; if (mergeMapWork.getVectorMode()) { mergeMapOp = new VectorMapOperator(); } else { @@ -199,6 +181,8 @@ } } + ((TezContext) (MapredContext.get())).setDummyOpsMap(connectOps); + // initialize map operator mapOp.setConf(mapWork); l4j.info("Main input name is " + mapWork.getName()); @@ -356,10 +340,6 @@ } } - public static Map getConnectOps() { - return connectOps; - } - private MRInputLegacy getMRInput(Map inputs) throws Exception { // there should be only one MRInput MRInputLegacy theMRInput = null; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java (working copy) @@ -64,12 +64,16 @@ private final Object[] row = new Object[2]; ObjectCache cache; + public MergeFileRecordProcessor(final JobConf jconf, final ProcessorContext context) { + super(jconf, context); + } + @Override - void init(final JobConf jconf, ProcessorContext processorContext, + void init( MRTaskReporter mrReporter, Map inputs, Map outputs) throws Exception { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); - super.init(jconf, processorContext, mrReporter, inputs, outputs); + super.init(mrReporter, inputs, outputs); execContext = new ExecMapperContext(jconf); //Update JobConf using MRInput, info like filename comes via this Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileTezProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileTezProcessor.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileTezProcessor.java (working copy) @@ -38,7 +38,7 @@ @Override public void run(Map inputs, Map outputs) throws Exception { - rproc = new MergeFileRecordProcessor(); + rproc = new MergeFileRecordProcessor(jobConf, getContext()); initializeAndRunProcessor(inputs, outputs); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java (working copy) @@ -20,8 +20,13 @@ import com.google.common.collect.Maps; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.ObjectCache; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector; import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; import org.apache.tez.mapreduce.processor.MRTaskReporter; @@ -32,9 +37,12 @@ import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; import java.net.URLClassLoader; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.concurrent.Callable; /** * Process input from tez LogicalInput and write output @@ -42,11 +50,11 @@ */ public abstract class RecordProcessor { - protected JobConf jconf; + protected final JobConf jconf; protected Map inputs; protected Map outputs; protected Map outMap; - protected ProcessorContext processorContext; + protected final ProcessorContext processorContext; public static final Log l4j = LogFactory.getLog(RecordProcessor.class); @@ -59,22 +67,23 @@ protected PerfLogger perfLogger = PerfLogger.getPerfLogger(); protected String CLASS_NAME = RecordProcessor.class.getName(); + public RecordProcessor(JobConf jConf, ProcessorContext processorContext) { + this.jconf = jConf; + this.processorContext = processorContext; + } + /** * Common initialization code for RecordProcessors - * @param jconf - * @param processorContext the {@link ProcessorContext} * @param mrReporter * @param inputs map of Input names to {@link LogicalInput}s * @param outputs map of Output names to {@link LogicalOutput}s * @throws Exception */ - void init(JobConf jconf, ProcessorContext processorContext, MRTaskReporter mrReporter, + void init(MRTaskReporter mrReporter, Map inputs, Map outputs) throws Exception { - this.jconf = jconf; this.reporter = mrReporter; this.inputs = inputs; this.outputs = outputs; - this.processorContext = processorContext; isLogInfoEnabled = l4j.isInfoEnabled(); isLogTraceEnabled = l4j.isTraceEnabled(); @@ -110,4 +119,32 @@ outMap.put(entry.getKey(), collector); } } + + public List getMergeWorkList(final JobConf jconf, String key, String queryId, + ObjectCache cache, List cacheKeys) throws HiveException { + String prefixes = jconf.get(DagUtils.TEZ_MERGE_WORK_FILE_PREFIXES); + if (prefixes != null) { + List mergeWorkList = new ArrayList(); + + for (final String prefix : prefixes.split(",")) { + if (prefix == null || prefix.isEmpty()) { + continue; + } + + key = queryId + prefix; + cacheKeys.add(key); + + mergeWorkList.add((BaseWork) cache.retrieve(key, new Callable() { + @Override + public Object call() { + return Utilities.getMergeWork(jconf, prefix); + } + })); + } + + return mergeWorkList; + } else { + return null; + } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java (working copy) @@ -18,16 +18,18 @@ package org.apache.hadoop.hive.ql.exec.tez; import java.util.ArrayList; -import java.util.Collections; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.TreeMap; import java.util.concurrent.Callable; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.ObjectCache; @@ -38,7 +40,7 @@ import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats; import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector; import org.apache.hadoop.hive.ql.log.PerfLogger; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -64,84 +66,125 @@ public static final Log l4j = LogFactory.getLog(ReduceRecordProcessor.class); - private ReduceWork redWork; + private ReduceWork reduceWork; + List mergeWorkList = null; + List cacheKeys; + + private final Map connectOps = + new TreeMap(); + private final Map tagToReducerMap = new HashMap(); + private Operator reducer; private ReduceRecordSource[] sources; - private final byte position = 0; + private byte bigTablePosition = 0; private boolean abort; - @Override - void init(final JobConf jconf, ProcessorContext processorContext, - MRTaskReporter mrReporter, Map inputs, - Map outputs) throws Exception { - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); - super.init(jconf, processorContext, mrReporter, inputs, outputs); + public ReduceRecordProcessor(final JobConf jconf, final ProcessorContext context) throws Exception { + super(jconf, context); ObjectCache cache = ObjectCacheFactory.getCache(jconf); String queryId = HiveConf.getVar(jconf, HiveConf.ConfVars.HIVEQUERYID); cacheKey = queryId + REDUCE_PLAN_KEY; - redWork = (ReduceWork) cache.retrieve(cacheKey, new Callable() { + cacheKeys = new ArrayList(); + cacheKeys.add(cacheKey); + reduceWork = (ReduceWork) cache.retrieve(cacheKey, new Callable() { @Override public Object call() { return Utilities.getReduceWork(jconf); - } - }); - Utilities.setReduceWork(jconf, redWork); + } + }); - reducer = redWork.getReducer(); - reducer.getParentOperators().clear(); - reducer.setParentOperators(null); // clear out any parents as reducer is the root + Utilities.setReduceWork(jconf, reduceWork); + mergeWorkList = getMergeWorkList(jconf, cacheKey, queryId, cache, cacheKeys); + } - int numTags = redWork.getTagToValueDesc().size(); + @Override + void init( + MRTaskReporter mrReporter, Map inputs, + Map outputs) throws Exception { + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); + super.init(mrReporter, inputs, outputs); - ObjectInspector[] ois = new ObjectInspector[numTags]; - sources = new ReduceRecordSource[numTags]; + MapredContext.init(false, new JobConf(jconf)); + List shuffleInputs = getShuffleInputs(inputs); + if (shuffleInputs != null) { + l4j.info("Waiting for ShuffleInputs to become ready"); + processorContext.waitForAllInputsReady(new ArrayList(shuffleInputs)); + } - for (int tag = 0; tag < redWork.getTagToValueDesc().size(); tag++) { - TableDesc keyTableDesc = redWork.getKeyDesc(); - TableDesc valueTableDesc = redWork.getTagToValueDesc().get(tag); + connectOps.clear(); + ReduceWork redWork = reduceWork; + tagToReducerMap.put(redWork.getTag(), redWork); + if (mergeWorkList != null) { + for (BaseWork mergeWork : mergeWorkList) { + ReduceWork mergeReduceWork = (ReduceWork) mergeWork; + reducer = mergeReduceWork.getReducer(); + DummyStoreOperator dummyStoreOp = getJoinParentOp(reducer); + connectOps.put(mergeReduceWork.getTag(), dummyStoreOp); + tagToReducerMap.put(mergeReduceWork.getTag(), mergeReduceWork); + } - // make the reader ready for prime time - Input input = inputs.get(redWork.getTagToInput().get(tag)); - input.start(); - processorContext.waitForAnyInputReady(Collections.singleton(input)); - KeyValuesReader reader = (KeyValuesReader) input.getReader(); - - // now we can setup the record source - sources[tag] = new ReduceRecordSource(); - sources[tag].init(jconf, reducer, redWork.getVectorMode(), keyTableDesc, valueTableDesc, - reader, tag == position, (byte) tag, - redWork.getAllScratchColumnVectorTypeMaps()); - ois[tag] = sources[tag].getObjectInspector(); + bigTablePosition = (byte) reduceWork.getTag(); + ((TezContext) MapredContext.get()).setDummyOpsMap(connectOps); } - MapredContext.init(false, new JobConf(jconf)); + ObjectInspector[] mainWorkOIs = null; ((TezContext) MapredContext.get()).setInputs(inputs); ((TezContext) MapredContext.get()).setTezProcessorContext(processorContext); - ((TezContext) MapredContext.get()).setRecordSources(sources); + int numTags = reduceWork.getTagToValueDesc().size(); + reducer = reduceWork.getReducer(); + if (numTags > 1) { + sources = new ReduceRecordSource[numTags]; + mainWorkOIs = new ObjectInspector[numTags]; + initializeMultipleSources(reduceWork, numTags, mainWorkOIs, sources); + ((TezContext) MapredContext.get()).setRecordSources(sources); + reducer.initialize(jconf, mainWorkOIs); + } else { + numTags = tagToReducerMap.keySet().size(); + sources = new ReduceRecordSource[numTags]; + mainWorkOIs = new ObjectInspector[numTags]; + for (int i : tagToReducerMap.keySet()) { + redWork = tagToReducerMap.get(i); + reducer = redWork.getReducer(); + initializeSourceForTag(redWork, i, mainWorkOIs, sources, + redWork.getTagToValueDesc().get(0), redWork.getTagToInput().get(0)); + reducer.initializeLocalWork(jconf); + } + reducer = reduceWork.getReducer(); + ((TezContext) MapredContext.get()).setRecordSources(sources); + reducer.initialize(jconf, new ObjectInspector[] { mainWorkOIs[bigTablePosition] }); + for (int i : tagToReducerMap.keySet()) { + if (i == bigTablePosition) { + continue; + } + redWork = tagToReducerMap.get(i); + reducer = redWork.getReducer(); + reducer.initialize(jconf, new ObjectInspector[] { mainWorkOIs[i] }); + } + } + reducer = reduceWork.getReducer(); // initialize reduce operator tree try { l4j.info(reducer.dump(0)); - reducer.initialize(jconf, ois); // Initialization isn't finished until all parents of all operators // are initialized. For broadcast joins that means initializing the // dummy parent operators as well. List dummyOps = redWork.getDummyOps(); if (dummyOps != null) { - for (Operator dummyOp : dummyOps){ + for (HashTableDummyOperator dummyOp : dummyOps) { dummyOp.initialize(jconf, null); } } // set output collector for any reduce sink operators in the pipeline. - List> children = new LinkedList>(); + List> children = new LinkedList>(); children.add(reducer); if (dummyOps != null) { children.addAll(dummyOps); @@ -165,13 +208,36 @@ perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); } + private void initializeMultipleSources(ReduceWork redWork, int numTags, ObjectInspector[] ois, + ReduceRecordSource[] sources) throws Exception { + for (int tag = 0; tag < redWork.getTagToValueDesc().size(); tag++) { + if (redWork.getTagToValueDesc().get(tag) == null) { + continue; + } + initializeSourceForTag(redWork, tag, ois, sources, redWork.getTagToValueDesc().get(tag), + redWork.getTagToInput().get(tag)); + } + } + + private void initializeSourceForTag(ReduceWork redWork, int tag, ObjectInspector[] ois, + ReduceRecordSource[] sources, TableDesc valueTableDesc, String inputName) + throws Exception { + reducer = redWork.getReducer(); + reducer.getParentOperators().clear(); + reducer.setParentOperators(null); // clear out any parents as reducer is the root + + TableDesc keyTableDesc = redWork.getKeyDesc(); + KeyValuesReader reader = (KeyValuesReader) inputs.get(inputName).getReader(); + + sources[tag] = new ReduceRecordSource(); + sources[tag].init(jconf, redWork.getReducer(), redWork.getVectorMode(), keyTableDesc, + valueTableDesc, reader, tag == bigTablePosition, (byte) tag, + redWork.getVectorScratchColumnTypeMap()); + ois[tag] = sources[tag].getObjectInspector(); + } + @Override void run() throws Exception { - List shuffleInputs = getShuffleInputs(inputs); - if (shuffleInputs != null) { - l4j.info("Waiting for ShuffleInputs to become ready"); - processorContext.waitForAllInputsReady(new ArrayList(shuffleInputs)); - } for (Entry outputEntry : outputs.entrySet()) { l4j.info("Starting Output: " + outputEntry.getKey()); @@ -180,22 +246,26 @@ } // run the operator pipeline - while (sources[position].pushRecord()) {} + while (sources[bigTablePosition].pushRecord()) { + } } /** * Get the inputs that should be streamed through reduce plan. + * * @param inputs * @return + * @throws Exception */ - private List getShuffleInputs(Map inputs) { - //the reduce plan inputs have tags, add all inputs that have tags - Map tagToinput = redWork.getTagToInput(); + private List getShuffleInputs(Map inputs) throws Exception { + // the reduce plan inputs have tags, add all inputs that have tags + Map tagToinput = reduceWork.getTagToInput(); ArrayList shuffleInputs = new ArrayList(); - for(String inpStr : tagToinput.values()){ + for (String inpStr : tagToinput.values()) { if (inputs.get(inpStr) == null) { throw new AssertionError("Cound not find input: " + inpStr); } + inputs.get(inpStr).start(); shuffleInputs.add(inputs.get(inpStr)); } return shuffleInputs; @@ -203,8 +273,10 @@ @Override void close(){ - if (cache != null) { - cache.release(cacheKey); + if (cache != null && cacheKeys != null) { + for (String key : cacheKeys) { + cache.release(key); + } } try { @@ -213,13 +285,18 @@ } reducer.close(abort); + if (mergeWorkList != null) { + for (BaseWork redWork : mergeWorkList) { + ((ReduceWork) redWork).getReducer().close(abort); + } + } // Need to close the dummyOps as well. The operator pipeline // is not considered "closed/done" unless all operators are // done. For broadcast joins that includes the dummy parents. - List dummyOps = redWork.getDummyOps(); + List dummyOps = reduceWork.getDummyOps(); if (dummyOps != null) { - for (Operator dummyOp : dummyOps){ + for (Operator dummyOp : dummyOps) { dummyOp.close(abort); } } @@ -230,8 +307,8 @@ if (!abort) { // signal new failure to map-reduce l4j.error("Hit error while closing operators - failing tree"); - throw new RuntimeException("Hive Runtime Error while closing operators: " - + e.getMessage(), e); + throw new RuntimeException( + "Hive Runtime Error while closing operators: " + e.getMessage(), e); } } finally { Utilities.clearWorkMap(); @@ -239,4 +316,19 @@ } } + private DummyStoreOperator getJoinParentOp(Operator mergeReduceOp) { + for (Operator childOp : mergeReduceOp.getChildOperators()) { + if ((childOp.getChildOperators() == null) || (childOp.getChildOperators().isEmpty())) { + if (childOp instanceof DummyStoreOperator) { + return (DummyStoreOperator) childOp; + } else { + throw new IllegalStateException("Was expecting dummy store operator but found: " + + childOp); + } + } else { + return getJoinParentOp(childOp); + } + } + throw new IllegalStateException("Expecting a DummyStoreOperator found op: " + mergeReduceOp); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java (working copy) @@ -68,7 +68,7 @@ private boolean abort = false; - private static Deserializer inputKeyDeserializer; + private Deserializer inputKeyDeserializer; // Input value serde needs to be an array to support different SerDe // for different tags @@ -114,7 +114,7 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, KeyValuesReader reader, boolean handleGroupKey, byte tag, - Map> scratchColumnVectorTypes) + Map vectorScratchColumnTypeMap) throws Exception { ObjectInspector keyObjectInspector; @@ -180,10 +180,8 @@ } rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois); - Map reduceShuffleScratchColumnTypeMap = - scratchColumnVectorTypes.get("_REDUCE_SHUFFLE_"); batchContext = new VectorizedRowBatchCtx(); - batchContext.init(reduceShuffleScratchColumnTypeMap, (StructObjectInspector) rowObjectInspector); + batchContext.init(vectorScratchColumnTypeMap, (StructObjectInspector) rowObjectInspector); batch = batchContext.createVectorizedRowBatch(); } else { ois.add(keyObjectInspector); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezContext.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezContext.java (working copy) @@ -19,6 +19,7 @@ import java.util.Map; +import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.mapred.JobConf; import org.apache.tez.runtime.api.LogicalInput; @@ -39,6 +40,8 @@ private RecordSource[] sources; + private Map dummyOpsMap; + public TezContext(boolean isMap, JobConf jobConf) { super(isMap, jobConf); } @@ -80,4 +83,12 @@ public void setRecordSources(RecordSource[] sources) { this.sources = sources; } + + public void setDummyOpsMap(Map dummyOpsMap) { + this.dummyOpsMap = dummyOpsMap; + } + + public Map getDummyOpsMap() { + return dummyOpsMap; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java (working copy) @@ -98,7 +98,7 @@ // in-place progress update related variables private int lines; - private PrintStream out; + private final PrintStream out; private String separator; private transient LogHelper console; @@ -115,6 +115,8 @@ private final NumberFormat commaFormat; private static final List shutdownList; + private StringBuffer diagnostics; + static { shutdownList = Collections.synchronizedList(new LinkedList()); Runtime.getRuntime().addShutdownHook(new Thread() { @@ -251,6 +253,7 @@ DAG dag) throws InterruptedException { DAGStatus status = null; completed = new HashSet(); + diagnostics = new StringBuffer(); boolean running = false; boolean done = false; @@ -396,6 +399,7 @@ if (rc != 0 && status != null) { for (String diag : status.getDiagnostics()) { console.printError(diag); + diagnostics.append(diag); } } shutdownList.remove(dagClient); @@ -800,11 +804,11 @@ perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s); } if(complete < total && (complete > 0 || running > 0 || failed > 0)) { - + if (!perfLogger.startTimeHasMethod(PerfLogger.TEZ_RUN_VERTEX + s)) { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s); } - + /* vertex is started, but not complete */ if (failed > 0) { reportBuffer.append(String.format("%s: %d(+%d,-%d)/%d\t", s, complete, running, failed, total)); @@ -825,4 +829,8 @@ return reportBuffer.toString(); } + + public String getDiagnostics() { + return diagnostics.toString(); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java (working copy) @@ -129,9 +129,9 @@ LOG.info("Running task: " + getContext().getUniqueIdentifier()); if (isMap) { - rproc = new MapRecordProcessor(jobConf); + rproc = new MapRecordProcessor(jobConf, getContext()); } else { - rproc = new ReduceRecordProcessor(); + rproc = new ReduceRecordProcessor(jobConf, getContext()); } initializeAndRunProcessor(inputs, outputs); @@ -144,7 +144,7 @@ try { MRTaskReporter mrReporter = new MRTaskReporter(getContext()); - rproc.init(jobConf, getContext(), mrReporter, inputs, outputs); + rproc.init(mrReporter, inputs, outputs); rproc.run(); //done - output does not need to be committed as hive does not use outputcommitter Index: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java (working copy) @@ -27,15 +27,16 @@ import java.util.Map; import java.util.Set; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MergeJoinWork; @@ -48,22 +49,22 @@ import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.tez.common.counters.CounterGroup; import org.apache.tez.common.counters.TezCounter; import org.apache.tez.common.counters.TezCounters; +import org.apache.tez.common.security.DAGAccessControls; import org.apache.tez.dag.api.DAG; import org.apache.tez.dag.api.Edge; import org.apache.tez.dag.api.GroupInputEdge; import org.apache.tez.dag.api.SessionNotRunning; -import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.api.Vertex; import org.apache.tez.dag.api.VertexGroup; import org.apache.tez.dag.api.client.DAGClient; import org.apache.tez.dag.api.client.StatusGetOpts; +import org.json.JSONObject; /** * @@ -165,6 +166,9 @@ // finally monitor will print progress until the job is done TezJobMonitor monitor = new TezJobMonitor(); rc = monitor.monitorExecution(client, ctx.getHiveTxnManager(), conf, dag); + if (rc != 0) { + this.setException(new HiveException(monitor.getDiagnostics())); + } // fetch the counters Set statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS); @@ -276,7 +280,18 @@ // the name of the dag is what is displayed in the AM/Job UI DAG dag = DAG.create(work.getName()); + + // set some info for the query + JSONObject json = new JSONObject().put("context", "Hive").put("description", ctx.getCmd()); + String dagInfo = json.toString(); + + if (LOG.isDebugEnabled()) { + LOG.debug("DagInfo: " + dagInfo); + } + dag.setDAGInfo(dagInfo); + dag.setCredentials(conf.getCredentials()); + setAccessControlsForCurrentUser(dag); for (BaseWork w: ws) { @@ -351,6 +366,17 @@ return dag; } + private void setAccessControlsForCurrentUser(DAG dag) { + // get current user + String currentUser = SessionState.getUserFromAuthenticator(); + if(LOG.isDebugEnabled()) { + LOG.debug("Setting Tez DAG access for " + currentUser); + } + // set permissions for current user on DAG + DAGAccessControls ac = new DAGAccessControls(currentUser, currentUser); + dag.setAccessControls(ac); + } + DAGClient submit(JobConf conf, DAG dag, Path scratchDir, LocalResource appJarLr, TezSessionState sessionState, List additionalLr, String[] inputOutputJars, Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java (working copy) @@ -0,0 +1,596 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Timestamp; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateUtils; + +/** + * This class assigns specified columns of a row from a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * assign from the Writable row Object[]. + * + * This class is abstract to allow the subclasses to control batch reuse. + */ +public abstract class VectorAssignRow { + private static final long serialVersionUID = 1L; + private static final Log LOG = LogFactory.getLog(VectorAssignRow.class); + + protected abstract class Assigner { + protected int columnIndex; + + Assigner(int columnIndex) { + this.columnIndex = columnIndex; + } + + public int getColumnIndex() { + return columnIndex; + } + + abstract void setColumnVector(VectorizedRowBatch batch); + + abstract void forgetColumnVector(); + + abstract void assign(int batchIndex, Object object); + } + + private class VoidAssigner extends Assigner { + + VoidAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + } + + @Override + void forgetColumnVector() { + } + + @Override + void assign(int batchIndex, Object object) { + // This is no-op, there is no column to assign to and the object is expected to be null. + assert (object == null); + } + } + + private abstract class AbstractLongAssigner extends Assigner { + + protected LongColumnVector colVector; + protected long[] vector; + + AbstractLongAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (LongColumnVector) batch.cols[columnIndex]; + vector = colVector.vector; + } + + @Override + void forgetColumnVector() { + colVector = null; + vector = null; + } + } + + protected class BooleanAssigner extends AbstractLongAssigner { + + BooleanAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + BooleanWritable bw = (BooleanWritable) object; + vector[batchIndex] = (bw.get() ? 1 : 0); + } + } + } + + protected class ByteAssigner extends AbstractLongAssigner { + + ByteAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + ByteWritable bw = (ByteWritable) object; + vector[batchIndex] = bw.get(); + } + } + } + + private class ShortAssigner extends AbstractLongAssigner { + + ShortAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + ShortWritable sw = (ShortWritable) object; + vector[batchIndex] = sw.get(); + } + } + } + + private class IntAssigner extends AbstractLongAssigner { + + IntAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + IntWritable iw = (IntWritable) object; + vector[batchIndex] = iw.get(); + } + } + } + + private class LongAssigner extends AbstractLongAssigner { + + LongAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + LongWritable lw = (LongWritable) object; + vector[batchIndex] = lw.get(); + } + } + } + + private class DateAssigner extends AbstractLongAssigner { + + DateAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + DateWritable bw = (DateWritable) object; + vector[batchIndex] = bw.getDays(); + } + } + } + + private class TimestampAssigner extends AbstractLongAssigner { + + TimestampAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + TimestampWritable tw = (TimestampWritable) object; + Timestamp t = tw.getTimestamp(); + vector[batchIndex] = TimestampUtils.getTimeNanoSec(t); + } + } + } + + private class IntervalYearMonthAssigner extends AbstractLongAssigner { + + IntervalYearMonthAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + HiveIntervalYearMonthWritable iymw = (HiveIntervalYearMonthWritable) object; + HiveIntervalYearMonth iym = iymw.getHiveIntervalYearMonth(); + vector[batchIndex] = iym.getTotalMonths(); + } + } + } + + private class IntervalDayTimeAssigner extends AbstractLongAssigner { + + IntervalDayTimeAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + HiveIntervalDayTimeWritable idtw = (HiveIntervalDayTimeWritable) object; + HiveIntervalDayTime idt = idtw.getHiveIntervalDayTime(); + vector[batchIndex] = DateUtils.getIntervalDayTimeTotalNanos(idt); + } + } + } + + private abstract class AbstractDoubleAssigner extends Assigner { + + protected DoubleColumnVector colVector; + protected double[] vector; + + AbstractDoubleAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (DoubleColumnVector) batch.cols[columnIndex]; + vector = colVector.vector; + } + + @Override + void forgetColumnVector() { + colVector = null; + vector = null; + } + } + + private class FloatAssigner extends AbstractDoubleAssigner { + + FloatAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + FloatWritable fw = (FloatWritable) object; + vector[batchIndex] = fw.get(); + } + } + } + + private class DoubleAssigner extends AbstractDoubleAssigner { + + DoubleAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + DoubleWritable dw = (DoubleWritable) object; + vector[batchIndex] = dw.get(); + } + } + } + + private abstract class AbstractBytesAssigner extends Assigner { + + protected BytesColumnVector colVector; + + AbstractBytesAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (BytesColumnVector) batch.cols[columnIndex]; + } + + @Override + void forgetColumnVector() { + colVector = null; + } + } + + private class BinaryAssigner extends AbstractBytesAssigner { + + BinaryAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + BytesWritable bw = (BytesWritable) object; + colVector.setVal(batchIndex, bw.getBytes(), 0, bw.getLength()); + } + } + } + + private class StringAssigner extends AbstractBytesAssigner { + + StringAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + Text tw = (Text) object; + colVector.setVal(batchIndex, tw.getBytes(), 0, tw.getLength()); + } + } + } + + private class VarCharAssigner extends AbstractBytesAssigner { + + VarCharAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // We store VARCHAR type stripped of pads. + HiveVarchar hiveVarchar; + if (object instanceof HiveVarchar) { + hiveVarchar = (HiveVarchar) object; + } else { + hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar(); + } + byte[] bytes = hiveVarchar.getValue().getBytes(); + colVector.setVal(batchIndex, bytes, 0, bytes.length); + } + } + } + + private class CharAssigner extends AbstractBytesAssigner { + + CharAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // We store CHAR type stripped of pads. + HiveChar hiveChar; + if (object instanceof HiveChar) { + hiveChar = (HiveChar) object; + } else { + hiveChar = ((HiveCharWritable) object).getHiveChar(); + } + + // We store CHAR in vector row batch with padding stripped. + byte[] bytes = hiveChar.getStrippedValue().getBytes(); + colVector.setVal(batchIndex, bytes, 0, bytes.length); + } + } + } + + private class DecimalAssigner extends Assigner { + + protected DecimalColumnVector colVector; + + DecimalAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (DecimalColumnVector) batch.cols[columnIndex]; + } + + @Override + void forgetColumnVector() { + colVector = null; + } + + @Override + void assign(int batchIndex, Object object) { + if (object == null) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + if (object instanceof HiveDecimal) { + colVector.set(batchIndex, (HiveDecimal) object); + } else { + colVector.set(batchIndex, (HiveDecimalWritable) object); + } + } + } + } + + private Assigner createAssigner(PrimitiveTypeInfo primitiveTypeInfo, int columnIndex) throws HiveException { + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + Assigner assigner; + switch (primitiveCategory) { + case VOID: + assigner = new VoidAssigner(columnIndex); + break; + case BOOLEAN: + assigner = new BooleanAssigner(columnIndex); + break; + case BYTE: + assigner = new ByteAssigner(columnIndex); + break; + case SHORT: + assigner = new ShortAssigner(columnIndex); + break; + case INT: + assigner = new IntAssigner(columnIndex); + break; + case LONG: + assigner = new LongAssigner(columnIndex); + break; + case TIMESTAMP: + assigner = new TimestampAssigner(columnIndex); + break; + case DATE: + assigner = new DateAssigner(columnIndex); + break; + case FLOAT: + assigner = new FloatAssigner(columnIndex); + break; + case DOUBLE: + assigner = new DoubleAssigner(columnIndex); + break; + case BINARY: + assigner = new BinaryAssigner(columnIndex); + break; + case STRING: + assigner = new StringAssigner(columnIndex); + break; + case VARCHAR: + assigner = new VarCharAssigner(columnIndex); + break; + case CHAR: + assigner = new CharAssigner(columnIndex); + break; + case DECIMAL: + assigner = new DecimalAssigner(columnIndex); + break; + case INTERVAL_YEAR_MONTH: + assigner = new IntervalYearMonthAssigner(columnIndex); + break; + case INTERVAL_DAY_TIME: + assigner = new IntervalDayTimeAssigner(columnIndex); + break; + default: + throw new HiveException("No vector row assigner for primitive category " + + primitiveCategory); + } + return assigner; + } + + Assigner[] assigners; + + public void init(StructObjectInspector structObjectInspector, List projectedColumns) throws HiveException { + + List fields = structObjectInspector.getAllStructFieldRefs(); + assigners = new Assigner[fields.size()]; + + int i = 0; + for (StructField field : fields) { + int columnIndex = projectedColumns.get(i); + ObjectInspector fieldInspector = field.getFieldObjectInspector(); + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString( + fieldInspector.getTypeName()); + assigners[i] = createAssigner(primitiveTypeInfo, columnIndex); + i++; + } + } + + public void init(List typeNames) throws HiveException { + + assigners = new Assigner[typeNames.size()]; + + int i = 0; + for (String typeName : typeNames) { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + assigners[i] = createAssigner(primitiveTypeInfo, i); + i++; + } + } + + protected void setBatch(VectorizedRowBatch batch) throws HiveException { + for (int i = 0; i < assigners.length; i++) { + Assigner assigner = assigners[i]; + int columnIndex = assigner.getColumnIndex(); + if (batch.cols[columnIndex] == null) { + throw new HiveException("Unexpected null vector column " + columnIndex); + } + assigner.setColumnVector(batch); + } + } + + protected void forgetBatch() { + for (Assigner assigner : assigners) { + assigner.forgetColumnVector(); + } + } + + public void assignRowColumn(int batchIndex, int logicalColumnIndex, Object object) { + assigners[logicalColumnIndex].assign(batchIndex, object); + } + + public void assignRow(int batchIndex, Object[] objects) { + int i = 0; + for (Assigner assigner : assigners) { + assigner.assign(batchIndex, objects[i++]); + } + } + +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java (working copy) @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * This class assigns specified columns of a VectorizedRowBatch row from a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * assign from the Writable row Object[]. + * + * This class is for use when the batch being assigned may change each time before processOp + * is called. + */ +public class VectorAssignRowDynBatch extends VectorAssignRow { + + public void setBatchOnEntry(VectorizedRowBatch batch) throws HiveException { + setBatch(batch); + } + + public void forgetBatchOnExit() { + forgetBatch(); + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java (working copy) @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * This class assigns specified columns of a VectorizedRowBatch row from a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * assign from the Writable row Object[]. + * + * This class is for use when the batch being assigned is always the same. + */ +public class VectorAssignRowSameBatch extends VectorAssignRow { + + public void setOneBatch(VectorizedRowBatch batch) throws HiveException { + setBatch(batch); + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java (working copy) @@ -573,6 +573,12 @@ vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.INTERVAL_DAY_TIME); } else if (writables[i] instanceof BooleanWritable) { vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BOOLEAN); + } else if (writables[i] instanceof HiveDecimalWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.DECIMAL); + } else if (writables[i] instanceof HiveCharWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.CHAR); + } else if (writables[i] instanceof HiveVarcharWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.VARCHAR); } else { throw new HiveException("Unimplemented vector assigner for writable type " + writables[i].getClass()); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnMapping.java (working copy) @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; + +/** + * This class collects column information for copying a row from one VectorizedRowBatch to + * same/another batch. + */ +public abstract class VectorColumnMapping { + + private static final long serialVersionUID = 1L; + + protected int[] sourceColumns; + protected int[] outputColumns; + protected String[] typeNames; + + protected VectorColumnOrderedMap vectorColumnMapping; + + public VectorColumnMapping() { + this.vectorColumnMapping = new VectorColumnOrderedMap(); + } + + public abstract void add(int sourceColumn, int outputColumn, String typeName); + + public abstract void finalize(); + + public int getCount() { + return sourceColumns.length; + } + + public int[] getInputColumns() { + return sourceColumns; + } + + public int[] getOutputColumns() { + return outputColumns; + } + + public String[] getTypeNames() { + return typeNames; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("source columns: " + Arrays.toString(sourceColumns)); + sb.append(", "); + sb.append("output columns: " + Arrays.toString(outputColumns)); + sb.append(", "); + sb.append("type names: " + Arrays.toString(typeNames)); + return sb.toString(); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOrderedMap.java (working copy) @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.ArrayList; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * This class collects column information for mapping vector columns, including the hive type name. + * + * The column information are kept ordered by a specified column. + * + * Call getMapping to collects the results into convenient arrays. + */ +public class VectorColumnOrderedMap { + protected static transient final Log LOG = LogFactory.getLog(VectorColumnOrderedMap.class); + + private TreeMap orderedTreeMap; + + private class Value { + int valueColumn; + + String typeName; + + Value(int valueColumn, String typeName) { + this.valueColumn = valueColumn; + this.typeName = typeName; + } + } + + public class Mapping { + + private final int[] orderedColumns; + private final int[] valueColumns; + private final String[] typeNames; + + Mapping(int[] orderedColumns, int[] valueColumns, String[] typeNames) { + this.orderedColumns = orderedColumns; + this.valueColumns = valueColumns; + this.typeNames = typeNames; + } + + public int getCount() { + return orderedColumns.length; + } + + public int[] getOrderedColumns() { + return orderedColumns; + } + + public int[] getValueColumns() { + return valueColumns; + } + + public String[] getTypeNames() { + return typeNames; + } + } + + public VectorColumnOrderedMap() { + orderedTreeMap = new TreeMap(); + } + + public void add(int orderedColumn, int valueColumn, String typeName) { + if (orderedTreeMap.containsKey(orderedColumn)) { + throw new Error("Duplicate column " + orderedColumn + " in ordered column map"); + } + orderedTreeMap.put(orderedColumn, new Value(valueColumn, typeName)); + } + + public boolean orderedColumnsContain(int orderedColumn) { + return orderedTreeMap.containsKey(orderedColumn); + } + + public Mapping getMapping() { + ArrayList orderedColumns = new ArrayList(); + ArrayList valueColumns = new ArrayList(); + ArrayList typeNames = new ArrayList(); + for (Map.Entry entry : orderedTreeMap.entrySet()) { + orderedColumns.add(entry.getKey()); + Value value = entry.getValue(); + valueColumns.add(value.valueColumn); + typeNames.add(value.typeName); + } + return new Mapping( + ArrayUtils.toPrimitive(orderedColumns.toArray(new Integer[0])), + ArrayUtils.toPrimitive(valueColumns.toArray(new Integer[0])), + typeNames.toArray(new String[0])); + + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnOutputMapping.java (working copy) @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; + +/** + * This class collects column information for copying a row from one VectorizedRowBatch to + * same/another batch. + * + * In this variation, column information is ordered by the output column number. + */ +public class VectorColumnOutputMapping extends VectorColumnMapping { + + private static final long serialVersionUID = 1L; + + @Override + public void add(int sourceColumn, int outputColumn, String typeName) { + // Order on outputColumn. + vectorColumnMapping.add(outputColumn, sourceColumn, typeName); + } + + public boolean containsOutputColumn(int outputColumn) { + return vectorColumnMapping.orderedColumnsContain(outputColumn); + } + + @Override + public void finalize() { + Mapping mapping = vectorColumnMapping.getMapping(); + + // Ordered columns are the output columns. + sourceColumns = mapping.getValueColumns(); + outputColumns = mapping.getOrderedColumns(); + typeNames = mapping.getTypeNames(); + + // Not needed anymore. + vectorColumnMapping = null; + } + +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java (working copy) @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping; + +/** + * This class collects column information for copying a row from one VectorizedRowBatch to + * same/another batch. + * + * In this variation, column information is ordered by the source column number. + */ +public class VectorColumnSourceMapping extends VectorColumnMapping { + + private static final long serialVersionUID = 1L; + + @Override + public void add(int sourceColumn, int outputColumn, String typeName) { + // Order on sourceColumn. + vectorColumnMapping.add(sourceColumn, outputColumn, typeName); + } + + @Override + public void finalize() { + Mapping mapping = vectorColumnMapping.getMapping(); + + // Ordered columns are the source columns. + sourceColumns = mapping.getOrderedColumns(); + outputColumns = mapping.getValueColumns(); + typeNames = mapping.getTypeNames(); + + // Not needed anymore. + vectorColumnMapping = null; + } + + public boolean isSourceSequenceGood() { + int count = sourceColumns.length; + for (int i = 0; i < count; i++) { + if (sourceColumns[i] != i) { + return false; + } + } + return true; + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java (working copy) @@ -0,0 +1,246 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * This class copies specified columns of a row from one VectorizedRowBatch to another. + */ +public class VectorCopyRow { + + protected static transient final Log LOG = LogFactory.getLog(VectorCopyRow.class); + + private abstract class CopyRow { + protected int inColumnIndex; + protected int outColumnIndex; + + CopyRow(int inColumnIndex, int outColumnIndex) { + this.inColumnIndex = inColumnIndex; + this.outColumnIndex = outColumnIndex; + } + + abstract void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex); + } + + private class LongCopyRow extends CopyRow { + + LongCopyRow(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + LongColumnVector inColVector = (LongColumnVector) inBatch.cols[inColumnIndex]; + LongColumnVector outColVector = (LongColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.vector[outBatchIndex] = inColVector.vector[0]; + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.vector[outBatchIndex] = inColVector.vector[inBatchIndex]; + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } + + private class DoubleCopyRow extends CopyRow { + + DoubleCopyRow(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + DoubleColumnVector inColVector = (DoubleColumnVector) inBatch.cols[inColumnIndex]; + DoubleColumnVector outColVector = (DoubleColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.vector[outBatchIndex] = inColVector.vector[0]; + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.vector[outBatchIndex] = inColVector.vector[inBatchIndex]; + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } + + private abstract class AbstractBytesCopyRow extends CopyRow { + + AbstractBytesCopyRow(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + } + + private class BytesCopyRowByValue extends AbstractBytesCopyRow { + + BytesCopyRowByValue(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + BytesColumnVector inColVector = (BytesColumnVector) inBatch.cols[inColumnIndex]; + BytesColumnVector outColVector = (BytesColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.setVal(outBatchIndex, inColVector.vector[0], inColVector.start[0], inColVector.length[0]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.setVal(outBatchIndex, inColVector.vector[inBatchIndex], inColVector.start[inBatchIndex], inColVector.length[inBatchIndex]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } + + private class BytesCopyRowByReference extends AbstractBytesCopyRow { + + BytesCopyRowByReference(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + BytesColumnVector inColVector = (BytesColumnVector) inBatch.cols[inColumnIndex]; + BytesColumnVector outColVector = (BytesColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.setRef(outBatchIndex, inColVector.vector[0], inColVector.start[0], inColVector.length[0]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.setRef(outBatchIndex, inColVector.vector[inBatchIndex], inColVector.start[inBatchIndex], inColVector.length[inBatchIndex]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } + + private class DecimalCopyRow extends CopyRow { + + DecimalCopyRow(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + DecimalColumnVector inColVector = (DecimalColumnVector) inBatch.cols[inColumnIndex]; + DecimalColumnVector outColVector = (DecimalColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.set(outBatchIndex, inColVector.vector[0]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.set(outBatchIndex, inColVector.vector[inBatchIndex]); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } + + private CopyRow[] subRowToBatchCopiersByValue; + private CopyRow[] subRowToBatchCopiersByReference; + + public void init(VectorColumnMapping columnMapping) { + int count = columnMapping.getCount(); + subRowToBatchCopiersByValue = new CopyRow[count]; + subRowToBatchCopiersByReference = new CopyRow[count]; + + for (int i = 0; i < count; i++) { + int inputColumn = columnMapping.getInputColumns()[i]; + int outputColumn = columnMapping.getOutputColumns()[i]; + String typeName = columnMapping.getTypeNames()[i]; + + CopyRow copyRowByValue = null; + CopyRow copyRowByReference = null; + + if (VectorizationContext.isIntFamily(typeName) || + VectorizationContext.isDatetimeFamily(typeName)) { + copyRowByValue = new LongCopyRow(inputColumn, outputColumn); + } else if (VectorizationContext.isFloatFamily(typeName)) { + copyRowByValue = new DoubleCopyRow(inputColumn, outputColumn); + } else if (VectorizationContext.isStringFamily(typeName)) { + copyRowByValue = new BytesCopyRowByValue(inputColumn, outputColumn); + copyRowByReference = new BytesCopyRowByReference(inputColumn, outputColumn); + } else if (VectorizationContext.decimalTypePattern.matcher(typeName).matches()){ + copyRowByValue = new DecimalCopyRow(inputColumn, outputColumn); + } else { + throw new Error("Cannot allocate vector copy row for " + typeName); + } + subRowToBatchCopiersByValue[i] = copyRowByValue; + if (copyRowByReference == null) { + subRowToBatchCopiersByReference[i] = copyRowByValue; + } else { + subRowToBatchCopiersByReference[i] = copyRowByReference; + } + } + } + + /* + * Use this copy method when the source batch may get reused before the target batch is finished. + * Any bytes column vector values will be copied to the target by value into the column's + * data buffer. + */ + public void copyByValue(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + for (CopyRow copyRow : subRowToBatchCopiersByValue) { + copyRow.copy(inBatch, inBatchIndex, outBatch, outBatchIndex); + } + } + + /* + * Use this copy method when the source batch is safe and will remain around until the target + * batch is finished. + * + * Any bytes column vector values will be referenced by the target column instead of copying. + */ + public void copyByReference(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + for (CopyRow copyRow : subRowToBatchCopiersByReference) { + copyRow.copy(inBatch, inBatchIndex, outBatch, outBatchIndex); + } + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java (working copy) @@ -0,0 +1,711 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.EOFException; +import java.io.IOException; +import java.sql.Timestamp; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hive.common.util.DateUtils; + +/** + * This class deserializes a serialization format into a row of a VectorizedRowBatch. + * + * The caller provides the hive type names and output column numbers in the order desired to + * deserialize. + * + * This class uses an provided DeserializeRead object to directly deserialize by reading + * field-by-field from a serialization format into the primitive values of the VectorizedRowBatch. + */ + +public class VectorDeserializeRow { + private static final long serialVersionUID = 1L; + private static final Log LOG = LogFactory.getLog(VectorDeserializeRow.class); + + private DeserializeRead deserializeRead; + + private Reader[] readersByValue; + private Reader[] readersByReference; + private PrimitiveTypeInfo[] primitiveTypeInfos; + + public VectorDeserializeRow(DeserializeRead deserializeRead) { + this(); + this.deserializeRead = deserializeRead; + primitiveTypeInfos = deserializeRead.primitiveTypeInfos(); + + } + + // Not public since we must have the deserialize read object. + private VectorDeserializeRow() { + } + + private abstract class Reader { + protected int columnIndex; + + Reader(int columnIndex) { + this.columnIndex = columnIndex; + } + + abstract void apply(VectorizedRowBatch batch, int batchIndex) throws IOException; + } + + private abstract class AbstractLongReader extends Reader { + + AbstractLongReader(int columnIndex) { + super(columnIndex); + } + } + + private class BooleanReader extends AbstractLongReader { + + BooleanReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + boolean value = deserializeRead.readBoolean(); + colVector.vector[batchIndex] = (value ? 1 : 0); + } + } + } + + private class ByteReader extends AbstractLongReader { + + ByteReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + byte value = deserializeRead.readByte(); + colVector.vector[batchIndex] = (long) value; + } + } + } + + private class ShortReader extends AbstractLongReader { + + ShortReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + short value = deserializeRead.readShort(); + colVector.vector[batchIndex] = (long) value; + } + } + } + + private class IntReader extends AbstractLongReader { + + IntReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + int value = deserializeRead.readInt(); + colVector.vector[batchIndex] = (long) value; + } + } + } + + private class LongReader extends AbstractLongReader { + + LongReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + long value = deserializeRead.readLong(); + colVector.vector[batchIndex] = value; + } + } + } + + private class DateReader extends AbstractLongReader { + + DeserializeRead.ReadDateResults readDateResults; + + DateReader(int columnIndex) { + super(columnIndex); + readDateResults = deserializeRead.createReadDateResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readDate(readDateResults); + colVector.vector[batchIndex] = (long) readDateResults.getDays(); + } + } + } + + private class TimestampReader extends AbstractLongReader { + + DeserializeRead.ReadTimestampResults readTimestampResults; + + TimestampReader(int columnIndex) { + super(columnIndex); + readTimestampResults = deserializeRead.createReadTimestampResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readTimestamp(readTimestampResults); + Timestamp t = readTimestampResults.getTimestamp(); + colVector.vector[batchIndex] = TimestampUtils.getTimeNanoSec(t); + } + } + } + + private class IntervalYearMonthReader extends AbstractLongReader { + + DeserializeRead.ReadIntervalYearMonthResults readIntervalYearMonthResults; + + IntervalYearMonthReader(int columnIndex) { + super(columnIndex); + readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readIntervalYearMonth(readIntervalYearMonthResults); + HiveIntervalYearMonth hiym = readIntervalYearMonthResults.getHiveIntervalYearMonth(); + colVector.vector[batchIndex] = hiym.getTotalMonths(); + } + } + } + + private class IntervalDayTimeReader extends AbstractLongReader { + + DeserializeRead.ReadIntervalDayTimeResults readIntervalDayTimeResults; + + IntervalDayTimeReader(int columnIndex) { + super(columnIndex); + readIntervalDayTimeResults = deserializeRead.createReadIntervalDayTimeResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readIntervalDayTime(readIntervalDayTimeResults); + HiveIntervalDayTime hidt = readIntervalDayTimeResults.getHiveIntervalDayTime(); + colVector.vector[batchIndex] = DateUtils.getIntervalDayTimeTotalNanos(hidt); + } + } + } + + private abstract class AbstractDoubleReader extends Reader { + + AbstractDoubleReader(int columnIndex) { + super(columnIndex); + } + } + + private class FloatReader extends AbstractDoubleReader { + + FloatReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + float value = deserializeRead.readFloat(); + colVector.vector[batchIndex] = (double) value; + } + } + } + + private class DoubleReader extends AbstractDoubleReader { + + DoubleReader(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + double value = deserializeRead.readDouble(); + colVector.vector[batchIndex] = value; + } + } + } + + private abstract class AbstractBytesReader extends Reader { + + AbstractBytesReader(int columnIndex) { + super(columnIndex); + } + } + + private class StringReaderByValue extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + StringReaderByValue(int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readString(readStringResults); + colVector.setVal(batchIndex, readStringResults.bytes, + readStringResults.start, readStringResults.length); + } + } + } + + private class StringReaderByReference extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + StringReaderByReference(int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readString(readStringResults); + colVector.setRef(batchIndex, readStringResults.bytes, + readStringResults.start, readStringResults.length); + } + } + } + + private class CharReaderByValue extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + private CharTypeInfo charTypeInfo; + + CharReaderByValue(CharTypeInfo charTypeInfo, int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + this.charTypeInfo = charTypeInfo; + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + deserializeRead.readString(readStringResults); + int adjustedLength = StringExpr.rightTrimAndTruncate(readStringResults.bytes, + readStringResults.start, readStringResults.length, charTypeInfo.getLength()); + colVector.setVal(batchIndex, readStringResults.bytes, readStringResults.start, adjustedLength); + } + } + } + + private class CharReaderByReference extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + private CharTypeInfo charTypeInfo; + + CharReaderByReference(CharTypeInfo charTypeInfo, int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + this.charTypeInfo = charTypeInfo; + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + deserializeRead.readString(readStringResults); + int adjustedLength = StringExpr.rightTrimAndTruncate(readStringResults.bytes, + readStringResults.start, readStringResults.length, charTypeInfo.getLength()); + colVector.setRef(batchIndex, readStringResults.bytes, readStringResults.start, adjustedLength); + } + } + } + + private class VarcharReaderByValue extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + private VarcharTypeInfo varcharTypeInfo; + + VarcharReaderByValue(VarcharTypeInfo varcharTypeInfo, int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + this.varcharTypeInfo = varcharTypeInfo; + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + deserializeRead.readString(readStringResults); + int adjustedLength = StringExpr.truncate(readStringResults.bytes, + readStringResults.start, readStringResults.length, varcharTypeInfo.getLength()); + colVector.setVal(batchIndex, readStringResults.bytes, readStringResults.start, adjustedLength); + } + } + } + + private class VarcharReaderByReference extends AbstractBytesReader { + + private DeserializeRead.ReadStringResults readStringResults; + + private VarcharTypeInfo varcharTypeInfo; + + VarcharReaderByReference(VarcharTypeInfo varcharTypeInfo, int columnIndex) { + super(columnIndex); + readStringResults = deserializeRead.createReadStringResults(); + this.varcharTypeInfo = varcharTypeInfo; + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + deserializeRead.readString(readStringResults); + int adjustedLength = StringExpr.truncate(readStringResults.bytes, + readStringResults.start, readStringResults.length, varcharTypeInfo.getLength()); + colVector.setRef(batchIndex, readStringResults.bytes, readStringResults.start, adjustedLength); + } + } + } + + private class BinaryReaderByValue extends AbstractBytesReader { + + private DeserializeRead.ReadBinaryResults readBinaryResults; + + BinaryReaderByValue(int columnIndex) { + super(columnIndex); + readBinaryResults = deserializeRead.createReadBinaryResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readBinary(readBinaryResults); + colVector.setVal(batchIndex, readBinaryResults.bytes, + readBinaryResults.start, readBinaryResults.length); + } + } + } + + private class BinaryReaderByReference extends AbstractBytesReader { + + private DeserializeRead.ReadBinaryResults readBinaryResults; + + BinaryReaderByReference(int columnIndex) { + super(columnIndex); + readBinaryResults = deserializeRead.createReadBinaryResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readBinary(readBinaryResults); + colVector.setRef(batchIndex, readBinaryResults.bytes, + readBinaryResults.start, readBinaryResults.length); + } + } + } + + private class HiveDecimalReader extends Reader { + + private DeserializeRead.ReadDecimalResults readDecimalResults; + + HiveDecimalReader(int columnIndex) { + super(columnIndex); + readDecimalResults = deserializeRead.createReadDecimalResults(); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DecimalColumnVector colVector = (DecimalColumnVector) batch.cols[columnIndex]; + + if (deserializeRead.readCheckNull()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + } else { + deserializeRead.readHiveDecimal(readDecimalResults); + HiveDecimal hiveDecimal = readDecimalResults.getHiveDecimal(); + colVector.vector[batchIndex].set(hiveDecimal); + } + } + } + + private void addReader(int index, int outputColumn) throws HiveException { + Reader readerByValue = null; + Reader readerByReference = null; + + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + switch (primitiveCategory) { + // case VOID: + // UNDONE: + // break; + case BOOLEAN: + readerByValue = new BooleanReader(outputColumn); + break; + case BYTE: + readerByValue = new ByteReader(outputColumn); + break; + case SHORT: + readerByValue = new ShortReader(outputColumn); + break; + case INT: + readerByValue = new IntReader(outputColumn); + break; + case LONG: + readerByValue = new LongReader(outputColumn); + break; + case DATE: + readerByValue = new DateReader(outputColumn); + break; + case TIMESTAMP: + readerByValue = new TimestampReader(outputColumn); + break; + case FLOAT: + readerByValue = new FloatReader(outputColumn); + break; + case DOUBLE: + readerByValue = new DoubleReader(outputColumn); + break; + case STRING: + readerByValue = new StringReaderByValue(outputColumn); + readerByReference = new StringReaderByReference(outputColumn); + break; + case CHAR: + { + CharTypeInfo charTypeInfo = (CharTypeInfo) primitiveTypeInfo; + readerByValue = new CharReaderByValue(charTypeInfo, outputColumn); + readerByReference = new CharReaderByReference(charTypeInfo, outputColumn); + } + break; + case VARCHAR: + { + VarcharTypeInfo varcharTypeInfo = (VarcharTypeInfo) primitiveTypeInfo; + readerByValue = new VarcharReaderByValue(varcharTypeInfo, outputColumn); + readerByReference = new VarcharReaderByReference(varcharTypeInfo, outputColumn); + } + break; + case BINARY: + readerByValue = new BinaryReaderByValue(outputColumn); + readerByReference = new BinaryReaderByReference(outputColumn); + break; + case DECIMAL: + readerByValue = new HiveDecimalReader(outputColumn); + break; + case INTERVAL_YEAR_MONTH: + readerByValue = new IntervalYearMonthReader(outputColumn); + break; + case INTERVAL_DAY_TIME: + readerByValue = new IntervalDayTimeReader(outputColumn); + break; + default: + throw new HiveException("Unexpected primitive type category " + primitiveCategory); + } + + readersByValue[index] = readerByValue; + if (readerByReference == null) { + readersByReference[index] = readerByValue; + } else { + readersByReference[index] = readerByReference; + } + } + + public void init(int[] outputColumns) throws HiveException { + + readersByValue = new Reader[primitiveTypeInfos.length]; + readersByReference = new Reader[primitiveTypeInfos.length]; + + for (int i = 0; i < primitiveTypeInfos.length; i++) { + int outputColumn = outputColumns[i]; + addReader(i, outputColumn); + } + } + + public void init(int startColumn) throws HiveException { + + readersByValue = new Reader[primitiveTypeInfos.length]; + readersByReference = new Reader[primitiveTypeInfos.length]; + + for (int i = 0; i < primitiveTypeInfos.length; i++) { + int outputColumn = startColumn + i; + addReader(i, outputColumn); + } + } + + public void init() throws HiveException { + init(0); + } + + public void setBytes(byte[] bytes, int offset, int length) { + deserializeRead.set(bytes, offset, length); + } + + public void deserializeByValue(VectorizedRowBatch batch, int batchIndex) throws IOException { + int i = 0; + try { + while (i < readersByValue.length) { + readersByValue[i].apply(batch, batchIndex); + i++; // Increment after the apply which could throw an exception. + } + } catch (EOFException e) { + throwMoreDetailedException(e, i); + } + deserializeRead.extraFieldsCheck(); + } + + public void deserializeByReference(VectorizedRowBatch batch, int batchIndex) throws IOException { + int i = 0; + try { + while (i < readersByReference.length) { + readersByReference[i].apply(batch, batchIndex); + i++; // Increment after the apply which could throw an exception. + } + } catch (EOFException e) { + throwMoreDetailedException(e, i); + } + deserializeRead.extraFieldsCheck(); + } + + private void throwMoreDetailedException(IOException e, int index) throws EOFException { + StringBuilder sb = new StringBuilder(); + sb.append("Detail: \"" + e.toString() + "\" occured for field " + index + " of " + primitiveTypeInfos.length + " fields ("); + for (int i = 0; i < primitiveTypeInfos.length; i++) { + if (i > 0) { + sb.append(", "); + } + sb.append(primitiveTypeInfos[i].getPrimitiveCategory().name()); + } + sb.append(")"); + throw new EOFException(sb.toString()); + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java (working copy) @@ -0,0 +1,735 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateUtils; + +/** + * This class extracts specified VectorizedRowBatch row columns into a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * extract from the Writable row Object[]. + * + * This class is abstract to allow the subclasses to control batch reuse. + */ +public abstract class VectorExtractRow { + private static final long serialVersionUID = 1L; + private static final Log LOG = LogFactory.getLog(VectorExtractRow.class); + + private boolean tolerateNullColumns; + + public VectorExtractRow() { + // UNDONE: For now allow null columns until vector_decimal_mapjoin.q is understood... + tolerateNullColumns = true; + } + + protected abstract class Extractor { + protected int columnIndex; + protected Object object; + + public Extractor(int columnIndex) { + this.columnIndex = columnIndex; + } + + public int getColumnIndex() { + return columnIndex; + } + + abstract void setColumnVector(VectorizedRowBatch batch); + + abstract void forgetColumnVector(); + + abstract Object extract(int batchIndex); + } + + private class VoidExtractor extends Extractor { + + VoidExtractor(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + } + + @Override + void forgetColumnVector() { + } + + @Override + Object extract(int batchIndex) { + return null; + } + } + + private abstract class AbstractLongExtractor extends Extractor { + + protected LongColumnVector colVector; + protected long[] vector; + + AbstractLongExtractor(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (LongColumnVector) batch.cols[columnIndex]; + vector = colVector.vector; + } + + @Override + void forgetColumnVector() { + colVector = null; + vector = null; + } + } + + protected class BooleanExtractor extends AbstractLongExtractor { + + BooleanExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector.create(false); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableBooleanObjectInspector.set(object, value == 0 ? false : true); + return object; + } else { + return null; + } + } + } + + protected class ByteExtractor extends AbstractLongExtractor { + + ByteExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableByteObjectInspector.create((byte) 0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableByteObjectInspector.set(object, (byte) value); + return object; + } else { + return null; + } + } + } + + private class ShortExtractor extends AbstractLongExtractor { + + ShortExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableShortObjectInspector.create((short) 0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableShortObjectInspector.set(object, (short) value); + return object; + } else { + return null; + } + } + } + + private class IntExtractor extends AbstractLongExtractor { + + IntExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableIntObjectInspector.create(0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableIntObjectInspector.set(object, (int) value); + return object; + } else { + return null; + } + } + } + + private class LongExtractor extends AbstractLongExtractor { + + LongExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableLongObjectInspector.create(0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableLongObjectInspector.set(object, value); + return object; + } else { + return null; + } + } + } + + private class DateExtractor extends AbstractLongExtractor { + + private Date date; + + DateExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableDateObjectInspector.create(new Date(0)); + date = new Date(0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + date.setTime(DateWritable.daysToMillis((int) value)); + PrimitiveObjectInspectorFactory.writableDateObjectInspector.set(object, date); + return object; + } else { + return null; + } + } + } + + private class TimestampExtractor extends AbstractLongExtractor { + + private Timestamp timestamp; + + TimestampExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.create(new Timestamp(0)); + timestamp = new Timestamp(0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + TimestampUtils.assignTimeInNanoSec(value, timestamp); + PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.set(object, timestamp); + return object; + } else { + return null; + } + } + } + + private class IntervalYearMonthExtractor extends AbstractLongExtractor { + + private HiveIntervalYearMonth hiveIntervalYearMonth; + + IntervalYearMonthExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.create(new HiveIntervalYearMonth(0)); + hiveIntervalYearMonth = new HiveIntervalYearMonth(0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + int totalMonths = (int) vector[adjustedIndex]; + hiveIntervalYearMonth.set(totalMonths); + PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.set(object, hiveIntervalYearMonth); + return object; + } else { + return null; + } + } + } + + private class IntervalDayTimeExtractor extends AbstractLongExtractor { + + private HiveIntervalDayTime hiveIntervalDayTime; + + IntervalDayTimeExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.create(new HiveIntervalDayTime(0, 0)); + hiveIntervalDayTime = new HiveIntervalDayTime(0, 0); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + long value = vector[adjustedIndex]; + DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, value); + PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.set(object, hiveIntervalDayTime); + return object; + } else { + return null; + } + } + } + + private abstract class AbstractDoubleExtractor extends Extractor { + + protected DoubleColumnVector colVector; + protected double[] vector; + + AbstractDoubleExtractor(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (DoubleColumnVector) batch.cols[columnIndex]; + vector = colVector.vector; + } + + @Override + void forgetColumnVector() { + colVector = null; + vector = null; + } + } + + private class FloatExtractor extends AbstractDoubleExtractor { + + FloatExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableFloatObjectInspector.create(0f); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + double value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableFloatObjectInspector.set(object, (float) value); + return object; + } else { + return null; + } + } + } + + private class DoubleExtractor extends AbstractDoubleExtractor { + + DoubleExtractor(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.create(0f); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + double value = vector[adjustedIndex]; + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.set(object, value); + return object; + } else { + return null; + } + } + } + + private abstract class AbstractBytesExtractor extends Extractor { + + protected BytesColumnVector colVector; + + AbstractBytesExtractor(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (BytesColumnVector) batch.cols[columnIndex]; + } + + @Override + void forgetColumnVector() { + colVector = null; + } + } + + private class BinaryExtractorByValue extends AbstractBytesExtractor { + + private DataOutputBuffer buffer; + + // Use the BytesWritable instance here as a reference to data saved in buffer. We do not + // want to pass the binary object inspector a byte[] since we would need to allocate it on the + // heap each time to get the length correct. + private BytesWritable bytesWritable; + + BinaryExtractorByValue(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY); + buffer = new DataOutputBuffer(); + bytesWritable = new BytesWritable(); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + byte[] bytes = colVector.vector[adjustedIndex]; + int start = colVector.start[adjustedIndex]; + int length = colVector.length[adjustedIndex]; + + // Save a copy of the binary data. + buffer.reset(); + try { + buffer.write(bytes, start, length); + } catch (IOException ioe) { + throw new IllegalStateException("bad write", ioe); + } + + bytesWritable.set(buffer.getData(), 0, buffer.getLength()); + PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.set(object, bytesWritable); + return object; + } else { + return null; + } + } + } + + private class StringExtractorByValue extends AbstractBytesExtractor { + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + private Text text; + + StringExtractorByValue(int columnIndex) { + super(columnIndex); + object = PrimitiveObjectInspectorFactory.writableStringObjectInspector.create(StringUtils.EMPTY); + text = new Text(); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + byte[] value = colVector.vector[adjustedIndex]; + int start = colVector.start[adjustedIndex]; + int length = colVector.length[adjustedIndex]; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + text.set(value, start, length); + + PrimitiveObjectInspectorFactory.writableStringObjectInspector.set(object, text); + return object; + } else { + return null; + } + } + } + + private class VarCharExtractorByValue extends AbstractBytesExtractor { + + // We need our own instance of the VARCHAR object inspector to hold the maximum length + // from the TypeInfo. + private WritableHiveVarcharObjectInspector writableVarcharObjectInspector; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + private Text text; + + /* + * @param varcharTypeInfo + * We need the VARCHAR type information that contains the maximum length. + * @param columnIndex + * The vector row batch column that contains the bytes for the VARCHAR. + */ + VarCharExtractorByValue(VarcharTypeInfo varcharTypeInfo, int columnIndex) { + super(columnIndex); + writableVarcharObjectInspector = new WritableHiveVarcharObjectInspector(varcharTypeInfo); + object = writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); + text = new Text(); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + byte[] value = colVector.vector[adjustedIndex]; + int start = colVector.start[adjustedIndex]; + int length = colVector.length[adjustedIndex]; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + text.set(value, start, length); + + writableVarcharObjectInspector.set(object, text.toString()); + return object; + } else { + return null; + } + } + } + + private class CharExtractorByValue extends AbstractBytesExtractor { + + // We need our own instance of the CHAR object inspector to hold the maximum length + // from the TypeInfo. + private WritableHiveCharObjectInspector writableCharObjectInspector; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + private Text text; + + /* + * @param varcharTypeInfo + * We need the CHAR type information that contains the maximum length. + * @param columnIndex + * The vector row batch column that contains the bytes for the CHAR. + */ + CharExtractorByValue(CharTypeInfo charTypeInfo, int columnIndex) { + super(columnIndex); + writableCharObjectInspector = new WritableHiveCharObjectInspector(charTypeInfo); + object = writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); + text = new Text(); + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + byte[] value = colVector.vector[adjustedIndex]; + int start = colVector.start[adjustedIndex]; + int length = colVector.length[adjustedIndex]; + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + text.set(value, start, length); + + writableCharObjectInspector.set(object, text.toString()); + return object; + } else { + return null; + } + } + } + + private class DecimalExtractor extends Extractor { + + private WritableHiveDecimalObjectInspector writableDecimalObjectInspector; + protected DecimalColumnVector colVector; + + /* + * @param decimalTypeInfo + * We need the DECIMAL type information that contains scale and precision. + * @param columnIndex + * The vector row batch column that contains the bytes for the VARCHAR. + */ + DecimalExtractor(DecimalTypeInfo decimalTypeInfo, int columnIndex) { + super(columnIndex); + writableDecimalObjectInspector = new WritableHiveDecimalObjectInspector(decimalTypeInfo); + object = writableDecimalObjectInspector.create(HiveDecimal.ZERO); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (DecimalColumnVector) batch.cols[columnIndex]; + } + + @Override + void forgetColumnVector() { + colVector = null; + } + + @Override + Object extract(int batchIndex) { + int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); + if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { + HiveDecimal value = colVector.vector[adjustedIndex].getHiveDecimal(); + writableDecimalObjectInspector.set(object, value); + return object; + } else { + return null; + } + } + } + + private Extractor createExtractor(PrimitiveTypeInfo primitiveTypeInfo, int columnIndex) throws HiveException { + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + Extractor extracter; + switch (primitiveCategory) { + case VOID: + extracter = new VoidExtractor(columnIndex); + break; + case BOOLEAN: + extracter = new BooleanExtractor(columnIndex); + break; + case BYTE: + extracter = new ByteExtractor(columnIndex); + break; + case SHORT: + extracter = new ShortExtractor(columnIndex); + break; + case INT: + extracter = new IntExtractor(columnIndex); + break; + case LONG: + extracter = new LongExtractor(columnIndex); + break; + case TIMESTAMP: + extracter = new TimestampExtractor(columnIndex); + break; + case DATE: + extracter = new DateExtractor(columnIndex); + break; + case FLOAT: + extracter = new FloatExtractor(columnIndex); + break; + case DOUBLE: + extracter = new DoubleExtractor(columnIndex); + break; + case BINARY: + extracter = new BinaryExtractorByValue(columnIndex); + break; + case STRING: + extracter = new StringExtractorByValue(columnIndex); + break; + case VARCHAR: + extracter = new VarCharExtractorByValue((VarcharTypeInfo) primitiveTypeInfo, columnIndex); + break; + case CHAR: + extracter = new CharExtractorByValue((CharTypeInfo) primitiveTypeInfo, columnIndex); + break; + case DECIMAL: + extracter = new DecimalExtractor((DecimalTypeInfo) primitiveTypeInfo, columnIndex); + break; + case INTERVAL_YEAR_MONTH: + extracter = new IntervalYearMonthExtractor(columnIndex); + break; + case INTERVAL_DAY_TIME: + extracter = new IntervalDayTimeExtractor(columnIndex); + break; + default: + throw new HiveException("No vector row extracter for primitive category " + + primitiveCategory); + } + return extracter; + } + + Extractor[] extracters; + + public void init(StructObjectInspector structObjectInspector, List projectedColumns) throws HiveException { + + extracters = new Extractor[projectedColumns.size()]; + + List fields = structObjectInspector.getAllStructFieldRefs(); + + int i = 0; + for (StructField field : fields) { + int columnIndex = projectedColumns.get(i); + ObjectInspector fieldInspector = field.getFieldObjectInspector(); + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString( + fieldInspector.getTypeName()); + extracters[i] = createExtractor(primitiveTypeInfo, columnIndex); + i++; + } + } + + public void init(List typeNames) throws HiveException { + + extracters = new Extractor[typeNames.size()]; + + int i = 0; + for (String typeName : typeNames) { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + extracters[i] = createExtractor(primitiveTypeInfo, i); + i++; + } + } + + public int getCount() { + return extracters.length; + } + + protected void setBatch(VectorizedRowBatch batch) throws HiveException { + + for (int i = 0; i < extracters.length; i++) { + Extractor extracter = extracters[i]; + int columnIndex = extracter.getColumnIndex(); + if (batch.cols[columnIndex] == null) { + if (tolerateNullColumns) { + // Replace with void... + extracter = new VoidExtractor(columnIndex); + extracters[i] = extracter; + } else { + throw new HiveException("Unexpected null vector column " + columnIndex); + } + } + extracter.setColumnVector(batch); + } + } + + protected void forgetBatch() { + for (Extractor extracter : extracters) { + extracter.forgetColumnVector(); + } + } + + public Object extractRowColumn(int batchIndex, int logicalColumnIndex) { + return extracters[logicalColumnIndex].extract(batchIndex); + } + + public void extractRow(int batchIndex, Object[] objects) { + int i = 0; + for (Extractor extracter : extracters) { + objects[i++] = extracter.extract(batchIndex); + } + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowDynBatch.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowDynBatch.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowDynBatch.java (working copy) @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * This class extracts specified VectorizedRowBatch row columns into a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * extract from the Writable row Object[]. + * + * This class is for use when the batch being assigned is always the same. + */ +public class VectorExtractRowDynBatch extends VectorExtractRow { + + public void setBatchOnEntry(VectorizedRowBatch batch) throws HiveException { + setBatch(batch); + } + + public void forgetBatchOnExit() { + forgetBatch(); + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowSameBatch.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowSameBatch.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRowSameBatch.java (working copy) @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * This class extracts specified VectorizedRowBatch row columns into a Writable row Object[]. + * + * The caller provides the hive type names and target column numbers in the order desired to + * extract from the Writable row Object[]. + * + * This class is for use when the batch being assigned is always the same. + */ +public class VectorExtractRowSameBatch extends VectorExtractRow { + + public void setOneBatch(VectorizedRowBatch batch) throws HiveException { + setBatch(batch); + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java (working copy) @@ -766,8 +766,7 @@ isVectorOutput = desc.getVectorDesc().isVectorOutput(); - vOutContext = new VectorizationContext(desc.getOutputColumnNames()); - vOutContext.setFileKey(vContext.getFileKey() + "/_GROUPBY_"); + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); } public VectorGroupByOperator() { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java (working copy) @@ -120,8 +120,7 @@ bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); // We are making a new output vectorized row batch. - vOutContext = new VectorizationContext(desc.getOutputColumnNames()); - vOutContext.setFileKey(vContext.getFileKey() + "/MAP_JOIN_" + desc.getBigTableAlias()); + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); } @Override Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java (working copy) @@ -117,8 +117,7 @@ bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); // We are making a new output vectorized row batch. - vOutContext = new VectorizationContext(desc.getOutputColumnNames()); - vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias()); + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); } @Override Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java (working copy) @@ -68,11 +68,8 @@ * Create a new vectorization context to create a new projection, but keep * same output column manager must be inherited to track the scratch the columns. */ - vOutContext = new VectorizationContext(vContext); + vOutContext = new VectorizationContext(getName(), vContext); - // Set a fileKey, although this operator doesn't use it. - vOutContext.setFileKey(vContext.getFileKey() + "/_SELECT_"); - vOutContext.resetProjectionColumns(); for (int i=0; i < colList.size(); ++i) { String columnName = this.conf.getOutputColumnNames().get(i); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java (working copy) @@ -0,0 +1,626 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.IOException; +import java.sql.Timestamp; +import java.util.List; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +/** + * This class serializes columns from a row in a VectorizedRowBatch into a serialization format. + * + * The caller provides the hive type names and column numbers in the order desired to + * serialize. + * + * This class uses an provided SerializeWrite object to directly serialize by writing + * field-by-field into a serialization format from the primitive values of the VectorizedRowBatch. + * + * Note that when serializing a row, the logical mapping using selected in use has already + * been performed. + */ +public class VectorSerializeRow { + + private SerializeWrite serializeWrite; + + public VectorSerializeRow(SerializeWrite serializeWrite) { + this(); + this.serializeWrite = serializeWrite; + } + + // Not public since we must have the serialize write object. + private VectorSerializeRow() { + } + + private abstract class Writer { + protected int columnIndex; + + Writer(int columnIndex) { + this.columnIndex = columnIndex; + } + + abstract boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException; + } + + private abstract class AbstractLongWriter extends Writer { + + AbstractLongWriter(int columnIndex) { + super(columnIndex); + } + } + + private class BooleanWriter extends AbstractLongWriter { + + BooleanWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeBoolean(colVector.vector[0] != 0); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeBoolean(colVector.vector[batchIndex] != 0); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class ByteWriter extends AbstractLongWriter { + + ByteWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeByte((byte) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeByte((byte) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class ShortWriter extends AbstractLongWriter { + + ShortWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeShort((short) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeShort((short) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class IntWriter extends AbstractLongWriter { + + IntWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeInt((int) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeInt((int) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class LongWriter extends AbstractLongWriter { + + LongWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeLong(colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeLong(colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class DateWriter extends AbstractLongWriter { + + DateWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeDate((int) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeDate((int) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class TimestampWriter extends AbstractLongWriter { + + Timestamp scratchTimestamp; + + TimestampWriter(int columnIndex) { + super(columnIndex); + scratchTimestamp = new Timestamp(0); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + TimestampUtils.assignTimeInNanoSec(colVector.vector[0], scratchTimestamp); + serializeWrite.writeTimestamp(scratchTimestamp); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + TimestampUtils.assignTimeInNanoSec(colVector.vector[batchIndex], scratchTimestamp); + serializeWrite.writeTimestamp(scratchTimestamp); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class IntervalYearMonthWriter extends AbstractLongWriter { + + IntervalYearMonthWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeHiveIntervalYearMonth((int) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeHiveIntervalYearMonth((int) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class IntervalDayTimeWriter extends AbstractLongWriter { + + IntervalDayTimeWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeHiveIntervalDayTime(colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeHiveIntervalDayTime(colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private abstract class AbstractDoubleWriter extends Writer { + + AbstractDoubleWriter(int columnIndex) { + super(columnIndex); + } + } + + private class FloatWriter extends AbstractDoubleWriter { + + FloatWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeFloat((float) colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeFloat((float) colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class DoubleWriter extends AbstractDoubleWriter { + + DoubleWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeDouble(colVector.vector[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeDouble(colVector.vector[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class StringWriter extends Writer { + + StringWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeString(colVector.vector[0], colVector.start[0], colVector.length[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeString(colVector.vector[batchIndex], + colVector.start[batchIndex], colVector.length[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class BinaryWriter extends Writer { + + BinaryWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeBinary(colVector.vector[0], colVector.start[0], colVector.length[0]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeBinary(colVector.vector[batchIndex], + colVector.start[batchIndex], colVector.length[batchIndex]); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private class HiveDecimalWriter extends Writer { + protected HiveDecimalWritable[] vector; + + HiveDecimalWriter(int columnIndex) { + super(columnIndex); + } + + @Override + boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DecimalColumnVector colVector = (DecimalColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + if (colVector.noNulls || !colVector.isNull[0]) { + serializeWrite.writeHiveDecimal(colVector.vector[0].getHiveDecimal()); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } else { + if (colVector.noNulls || !colVector.isNull[batchIndex]) { + serializeWrite.writeHiveDecimal(colVector.vector[batchIndex].getHiveDecimal()); + return true; + } else { + serializeWrite.writeNull(); + return false; + } + } + } + } + + private Writer[] writers; + + private Writer createWriter(TypeInfo typeInfo, int columnIndex) throws HiveException { + Writer writer; + Category category = typeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + switch (primitiveCategory) { + // case VOID: + // UNDONE: + // break; + case BOOLEAN: + writer = new BooleanWriter(columnIndex); + break; + case BYTE: + writer = new ByteWriter(columnIndex); + break; + case SHORT: + writer = new ShortWriter(columnIndex); + break; + case INT: + writer = new IntWriter(columnIndex); + break; + case LONG: + writer = new LongWriter(columnIndex); + break; + case DATE: + writer = new DateWriter(columnIndex); + break; + case TIMESTAMP: + writer = new TimestampWriter(columnIndex); + break; + case FLOAT: + writer = new FloatWriter(columnIndex); + break; + case DOUBLE: + writer = new DoubleWriter(columnIndex); + break; + case STRING: + case CHAR: + case VARCHAR: + // We store CHAR and VARCHAR without pads, so use STRING writer class. + writer = new StringWriter(columnIndex); + break; + case BINARY: + writer = new BinaryWriter(columnIndex); + break; + case DECIMAL: + writer = new HiveDecimalWriter(columnIndex); + break; + case INTERVAL_YEAR_MONTH: + writer = new IntervalYearMonthWriter(columnIndex); + break; + case INTERVAL_DAY_TIME: + writer = new IntervalDayTimeWriter(columnIndex); + break; + default: + throw new HiveException("Unexpected primitive type category " + primitiveCategory); + } + } + break; + default: + throw new HiveException("Unexpected type category " + category); + } + return writer; + } + + public void init(List typeNames, int[] columnMap) throws HiveException { + writers = new Writer[typeNames.size()]; + for (int i = 0; i < typeNames.size(); i++) { + String typeName = typeNames.get(i); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + int columnIndex = columnMap[i]; + Writer writer = createWriter(typeInfo, columnIndex); + writers[i] = writer; + } + } + + public void init(List typeNames) throws HiveException { + writers = new Writer[typeNames.size()]; + for (int i = 0; i < typeNames.size(); i++) { + String typeName = typeNames.get(i); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Writer writer = createWriter(typeInfo, i); + writers[i] = writer; + } + } + + public int getCount() { + return writers.length; + } + + public void setOutput(Output output) { + serializeWrite.set(output); + } + + /* + * Note that when serializing a row, the logical mapping using selected in use has already + * been performed. batchIndex is the actual index of the row. + */ + public boolean serializeWrite(VectorizedRowBatch batch, int batchIndex) throws IOException { + boolean anyNulls = false; + for (Writer writer : writers) { + if (!writer.apply(batch, batchIndex)) { + anyNulls = true; + } + } + return anyNulls; + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRowNoNulls.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRowNoNulls.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRowNoNulls.java (working copy) @@ -0,0 +1,395 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.IOException; +import java.sql.Timestamp; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +/** + * This class serializes columns from a row in a VectorizedRowBatch into a serialization format. + * + * The caller provides the hive type names and column numbers in the order desired to + * serialize. + * + * This class uses an provided SerializeWrite object to directly serialize by writing + * field-by-field into a serialization format from the primitive values of the VectorizedRowBatch. + * + * Note that when serializing a row, the logical mapping using selected in use has already + * been performed. + * + * NOTE: This class is a variation of VectorSerializeRow for serialization of columns that + * have no nulls. + */ +public class VectorSerializeRowNoNulls { + private static final Log LOG = LogFactory.getLog(VectorSerializeRowNoNulls.class.getName()); + + private SerializeWrite serializeWrite; + + public VectorSerializeRowNoNulls(SerializeWrite serializeWrite) { + this(); + this.serializeWrite = serializeWrite; + } + + // Not public since we must have the serialize write object. + private VectorSerializeRowNoNulls() { + } + + private abstract class Writer { + protected int columnIndex; + + Writer(int columnIndex) { + this.columnIndex = columnIndex; + } + + abstract void apply(VectorizedRowBatch batch, int batchIndex) throws IOException; + } + + private abstract class AbstractLongWriter extends Writer { + + AbstractLongWriter(int columnIndex) { + super(columnIndex); + } + } + + private class BooleanWriter extends AbstractLongWriter { + + BooleanWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeBoolean(colVector.vector[colVector.isRepeating ? 0 : batchIndex] != 0); + } + } + + private class ByteWriter extends AbstractLongWriter { + + ByteWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeByte((byte) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class ShortWriter extends AbstractLongWriter { + + ShortWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeShort((short) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class IntWriter extends AbstractLongWriter { + + IntWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeInt((int) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class LongWriter extends AbstractLongWriter { + + LongWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeLong(colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class DateWriter extends AbstractLongWriter { + + DateWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeDate((int) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class TimestampWriter extends AbstractLongWriter { + + Timestamp scratchTimestamp; + + TimestampWriter(int columnIndex) { + super(columnIndex); + scratchTimestamp = new Timestamp(0); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + TimestampUtils.assignTimeInNanoSec(colVector.vector[colVector.isRepeating ? 0 : batchIndex], scratchTimestamp); + serializeWrite.writeTimestamp(scratchTimestamp); + } + } + + private class IntervalYearMonthWriter extends AbstractLongWriter { + + IntervalYearMonthWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeHiveIntervalYearMonth((int) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class IntervalDayTimeWriter extends AbstractLongWriter { + + IntervalDayTimeWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + serializeWrite.writeHiveIntervalDayTime(colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private abstract class AbstractDoubleWriter extends Writer { + + AbstractDoubleWriter(int columnIndex) { + super(columnIndex); + } + } + + private class FloatWriter extends AbstractDoubleWriter { + + FloatWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + serializeWrite.writeFloat((float) colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class DoubleWriter extends AbstractDoubleWriter { + + DoubleWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex]; + serializeWrite.writeDouble(colVector.vector[colVector.isRepeating ? 0 : batchIndex]); + } + } + + private class StringWriter extends Writer { + + StringWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + serializeWrite.writeString(colVector.vector[0], colVector.start[0], colVector.length[0]); + } else { + serializeWrite.writeString(colVector.vector[batchIndex], colVector.start[batchIndex], colVector.length[batchIndex]); + } + } + } + + private class BinaryWriter extends Writer { + + BinaryWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex]; + + if (colVector.isRepeating) { + serializeWrite.writeBinary(colVector.vector[0], colVector.start[0], colVector.length[0]); + } else { + serializeWrite.writeBinary(colVector.vector[batchIndex], colVector.start[batchIndex], colVector.length[batchIndex]); + } + } + } + + private class HiveDecimalWriter extends Writer { + + HiveDecimalWriter(int columnIndex) { + super(columnIndex); + } + + @Override + void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { + DecimalColumnVector colVector = (DecimalColumnVector) batch.cols[columnIndex]; + serializeWrite.writeHiveDecimal(colVector.vector[colVector.isRepeating ? 0 : batchIndex].getHiveDecimal()); + } + } + + private Writer[] writers; + + private Writer createWriter(TypeInfo typeInfo, int columnIndex) throws HiveException { + Writer writer; + Category category = typeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + switch (primitiveCategory) { + // case VOID: + // UNDONE: + // break; + case BOOLEAN: + writer = new BooleanWriter(columnIndex); + break; + case BYTE: + writer = new ByteWriter(columnIndex); + break; + case SHORT: + writer = new ShortWriter(columnIndex); + break; + case INT: + writer = new IntWriter(columnIndex); + break; + case LONG: + writer = new LongWriter(columnIndex); + break; + case DATE: + writer = new DateWriter(columnIndex); + break; + case TIMESTAMP: + writer = new TimestampWriter(columnIndex); + break; + case FLOAT: + writer = new FloatWriter(columnIndex); + break; + case DOUBLE: + writer = new DoubleWriter(columnIndex); + break; + case STRING: + case CHAR: + case VARCHAR: + // We store CHAR and VARCHAR without pads, so use STRING writer class. + writer = new StringWriter(columnIndex); + break; + case BINARY: + writer = new BinaryWriter(columnIndex); + break; + case DECIMAL: + writer = new HiveDecimalWriter(columnIndex); + break; + case INTERVAL_YEAR_MONTH: + writer = new IntervalYearMonthWriter(columnIndex); + break; + case INTERVAL_DAY_TIME: + writer = new IntervalDayTimeWriter(columnIndex); + break; + default: + throw new HiveException("Unexpected primitive type category " + primitiveCategory); + } + } + break; + default: + throw new HiveException("Unexpected type category " + category); + } + return writer; + } + + public void init(List typeNames, int[] columnMap) throws HiveException { + writers = new Writer[typeNames.size()]; + for (int i = 0; i < typeNames.size(); i++) { + String typeName = typeNames.get(i); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + int columnIndex = columnMap[i]; + Writer writer = createWriter(typeInfo, columnIndex); + writers[i] = writer; + } + } + + public void init(List typeNames) throws HiveException { + writers = new Writer[typeNames.size()]; + for (int i = 0; i < typeNames.size(); i++) { + String typeName = typeNames.get(i); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Writer writer = createWriter(typeInfo, i); + writers[i] = writer; + } + } + + public int getCount() { + return writers.length; + } + + public void setOutput(Output output) { + serializeWrite.set(output); + } + + /* + * Note that when serializing a row, the logical mapping using selected in use has already + * been performed. batchIndex is the actual index of the row. + */ + public void serializeWriteNoNulls(VectorizedRowBatch batch, int batchIndex) throws IOException { + for (Writer writer : writers) { + writer.apply(batch, batchIndex); + } + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (working copy) @@ -31,8 +31,10 @@ import java.util.Map; import java.util.Set; import java.util.TreeMap; +import java.util.TreeSet; import java.util.regex.Pattern; +import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -128,6 +130,9 @@ private static final Log LOG = LogFactory.getLog( VectorizationContext.class.getName()); + private String contextName; + private int level; + VectorExpressionDescriptor vMap; private List projectedColumns; @@ -140,7 +145,10 @@ // Convenient constructor for initial batch creation takes // a list of columns names and maps them to 0..n-1 indices. - public VectorizationContext(List initialColumnNames) { + public VectorizationContext(String contextName, List initialColumnNames) { + this.contextName = contextName; + level = 0; + LOG.info("VectorizationContext consructor contextName " + contextName + " level " + level + " initialColumnNames " + initialColumnNames.toString()); this.projectionColumnNames = initialColumnNames; projectedColumns = new ArrayList(); @@ -157,8 +165,11 @@ // Constructor to with the individual addInitialColumn method // followed by a call to finishedAddingInitialColumns. - public VectorizationContext() { - projectedColumns = new ArrayList(); + public VectorizationContext(String contextName) { + this.contextName = contextName; + level = 0; + LOG.info("VectorizationContext consructor contextName " + contextName + " level " + level); + projectedColumns = new ArrayList(); projectionColumnNames = new ArrayList(); projectionColumnMap = new HashMap(); this.ocm = new OutputColumnManager(0); @@ -169,7 +180,10 @@ // Constructor useful making a projection vectorization context. // Use with resetProjectionColumns and addProjectionColumn. // Keeps existing output column map, etc. - public VectorizationContext(VectorizationContext vContext) { + public VectorizationContext(String contextName, VectorizationContext vContext) { + this.contextName = contextName; + level = vContext.level + 1; + LOG.info("VectorizationContext consructor reference contextName " + contextName + " level " + level); this.projectedColumns = new ArrayList(); this.projectionColumnNames = new ArrayList(); this.projectionColumnMap = new HashMap(); @@ -238,13 +252,6 @@ //Map column number to type private OutputColumnManager ocm; - // File key is used by operators to retrieve the scratch vectors - // from mapWork at runtime. The operators that modify the structure of - // a vector row batch, need to allocate scratch vectors as well. Every - // operator that creates a new Vectorization context should set a unique - // fileKey. - private String fileKey = null; - // Set of UDF classes for type casting data types in row-mode. private static Set> castExpressionUdfs = new HashSet>(); static { @@ -268,14 +275,6 @@ castExpressionUdfs.add(UDFToShort.class); } - public String getFileKey() { - return fileKey; - } - - public void setFileKey(String fileKey) { - this.fileKey = fileKey; - } - protected int getInputColumnIndex(String name) throws HiveException { if (name == null) { throw new HiveException("Null column name"); @@ -316,6 +315,7 @@ // We need to differentiate DECIMAL columns by their precision and scale... String normalizedTypeName = getNormalizedName(hiveTypeName); int relativeCol = allocateOutputColumnInternal(normalizedTypeName); + // LOG.info("allocateOutputColumn for hiveTypeName " + hiveTypeName + " column " + (initialOutputCol + relativeCol)); return initialOutputCol + relativeCol; } @@ -357,8 +357,24 @@ usedOutputColumns.remove(index-initialOutputCol); } } + + public int[] currentScratchColumns() { + TreeSet treeSet = new TreeSet(); + for (Integer col : usedOutputColumns) { + treeSet.add(initialOutputCol + col); + } + return ArrayUtils.toPrimitive(treeSet.toArray(new Integer[0])); + } } + public int allocateScratchColumn(String hiveTypeName) { + return ocm.allocateOutputColumn(hiveTypeName); + } + + public int[] currentScratchColumns() { + return ocm.currentScratchColumns(); + } + private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, Mode mode) throws HiveException { int columnNum = getInputColumnIndex(exprDesc.getColumn()); @@ -2106,6 +2122,10 @@ "\" for type: \"" + inputType.name() + " (reduce-side = " + isReduce + ")"); } + public int firstOutputColumnIndex() { + return firstOutputColumnIndex; + } + public Map getScratchColumnTypeMap() { Map map = new HashMap(); for (int i = 0; i < ocm.outputColCount; i++) { @@ -2117,7 +2137,7 @@ public String toString() { StringBuilder sb = new StringBuilder(32); - sb.append("Context key ").append(getFileKey()).append(", "); + sb.append("Context name ").append(contextName).append(", level " + level + ", "); Comparator comparerInteger = new Comparator() { @Override @@ -2129,11 +2149,11 @@ for (Map.Entry entry : projectionColumnMap.entrySet()) { sortedColumnMap.put(entry.getValue(), entry.getKey()); } - sb.append("sortedProjectionColumnMap ").append(sortedColumnMap).append(", "); + sb.append("sorted projectionColumnMap ").append(sortedColumnMap).append(", "); Map sortedScratchColumnTypeMap = new TreeMap(comparerInteger); sortedScratchColumnTypeMap.putAll(getScratchColumnTypeMap()); - sb.append("sortedScratchColumnTypeMap ").append(sortedScratchColumnTypeMap); + sb.append("sorted scratchColumnTypeMap ").append(sortedScratchColumnTypeMap); return sb.toString(); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import java.sql.Timestamp; +import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -29,7 +30,6 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -43,10 +43,15 @@ import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; @@ -542,5 +547,98 @@ poi.getPrimitiveCategory()); } } + + public static StandardStructObjectInspector convertToStandardStructObjectInspector( + StructObjectInspector structObjectInspector) throws HiveException { + + List fields = structObjectInspector.getAllStructFieldRefs(); + List oids = new ArrayList(); + ArrayList columnNames = new ArrayList(); + + for(StructField field : fields) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString( + field.getFieldObjectInspector().getTypeName()); + ObjectInspector standardWritableObjectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo); + oids.add(standardWritableObjectInspector); + columnNames.add(field.getFieldName()); + } + return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames,oids); + } + + public static PrimitiveTypeInfo[] primitiveTypeInfosFromStructObjectInspector( + StructObjectInspector structObjectInspector) throws HiveException { + + List fields = structObjectInspector.getAllStructFieldRefs(); + PrimitiveTypeInfo[] result = new PrimitiveTypeInfo[fields.size()]; + + int i = 0; + for(StructField field : fields) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString( + field.getFieldObjectInspector().getTypeName()); + result[i++] = (PrimitiveTypeInfo) typeInfo; + } + return result; + } + + + public static String displayBytes(byte[] bytes, int start, int length) { + StringBuilder sb = new StringBuilder(); + for (int i = start; i < start + length; i++) { + char ch = (char) bytes[i]; + if (ch < ' ' || ch > '~') { + sb.append(String.format("\\%03d", (int) (bytes[i] & 0xff))); + } else { + sb.append(ch); + } + } + return sb.toString(); + } + + public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) { + StringBuffer sb = new StringBuffer(); + sb.append(prefix + " row " + index + " "); + for (int i = 0; i < batch.projectionSize; i++) { + int column = batch.projectedColumns[i]; + ColumnVector colVector = batch.cols[column]; + if (colVector == null) { + sb.append("(null colVector " + column + ")"); + } else { + boolean isRepeating = colVector.isRepeating; + index = (isRepeating ? 0 : index); + if (colVector.noNulls || !colVector.isNull[index]) { + if (colVector instanceof LongColumnVector) { + sb.append(((LongColumnVector) colVector).vector[index]); + } else if (colVector instanceof DoubleColumnVector) { + sb.append(((DoubleColumnVector) colVector).vector[index]); + } else if (colVector instanceof BytesColumnVector) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector; + byte[] bytes = bytesColumnVector.vector[index]; + int start = bytesColumnVector.start[index]; + int length = bytesColumnVector.length[index]; + if (bytes == null) { + sb.append("(Unexpected null bytes with start " + start + " length " + length + ")"); + } else { + sb.append(displayBytes(bytes, start, length)); + } + } else if (colVector instanceof DecimalColumnVector) { + sb.append(((DecimalColumnVector) colVector).vector[index].toString()); + } else { + sb.append("Unknown"); + } + } else { + sb.append("NULL"); + } + } + sb.append(" "); + } + System.out.println(sb.toString()); + } + + public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) throws HiveException { + for (int i = 0; i < batch.size; i++) { + int index = (batch.selectedInUse ? batch.selected[i] : i); + debugDisplayOneRow(batch, index, prefix); + } + } } - Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java (working copy) @@ -190,6 +190,7 @@ * - sets size to 0 * - sets endOfFile to false * - resets each column + * - inits each column */ public void reset() { selectedInUse = false; @@ -198,6 +199,7 @@ for (ColumnVector vc : cols) { if (vc != null) { vc.reset(); + vc.init(); } } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (working copy) @@ -42,7 +42,6 @@ import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; @@ -174,9 +173,8 @@ split.getPath(), IOPrepareCache.get().getPartitionDescMap()); String partitionPath = split.getPath().getParent().toString(); - scratchColumnTypeMap = Utilities - .getMapWorkAllScratchColumnVectorTypeMaps(hiveConf) - .get(partitionPath); + scratchColumnTypeMap = Utilities.getMapWorkVectorScratchColumnTypeMap(hiveConf); + // LOG.info("VectorizedRowBatchCtx init scratchColumnTypeMap " + scratchColumnTypeMap.toString()); Properties partProps = (part.getPartSpec() == null || part.getPartSpec().isEmpty()) ? @@ -631,7 +629,7 @@ for (int i = origNumCols; i < newNumCols; i++) { String typeName = scratchColumnTypeMap.get(i); if (typeName == null) { - throw new HiveException("No type found for column type entry " + i); + throw new HiveException("No type entry found for column " + i + " in map " + scratchColumnTypeMap.toString()); } vrb.cols[i] = allocateColumnVector(typeName, VectorizedRowBatch.DEFAULT_SIZE); @@ -646,7 +644,7 @@ * @param decimalType The given decimal type string. * @return An integer array of size 2 with first element set to precision and second set to scale. */ - private int[] getScalePrecisionFromDecimalType(String decimalType) { + private static int[] getScalePrecisionFromDecimalType(String decimalType) { Pattern p = Pattern.compile("\\d+"); Matcher m = p.matcher(decimalType); m.find(); @@ -657,7 +655,7 @@ return precScale; } - private ColumnVector allocateColumnVector(String type, int defaultSize) { + public static ColumnVector allocateColumnVector(String type, int defaultSize) { if (type.equalsIgnoreCase("double")) { return new DoubleColumnVector(defaultSize); } else if (VectorizationContext.isStringFamily(type)) { @@ -675,18 +673,4 @@ throw new Error("Cannot allocate vector column for " + type); } } - - public VectorColumnAssign[] buildObjectAssigners(VectorizedRowBatch outputBatch) - throws HiveException { - List fieldRefs = rowOI.getAllStructFieldRefs(); - assert outputBatch.numCols == fieldRefs.size(); - VectorColumnAssign[] assigners = new VectorColumnAssign[fieldRefs.size()]; - for(int i = 0; i < assigners.length; ++i) { - StructField fieldRef = fieldRefs.get(i); - ObjectInspector fieldOI = fieldRef.getFieldObjectInspector(); - assigners[i] = VectorColumnAssignFactory.buildObjectAssign( - outputBatch, i, fieldOI); - } - return assigners; - } } Index: ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java (working copy) @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.mapred.InputFormat; import java.io.IOException; @@ -42,7 +43,7 @@ import java.util.LinkedHashMap; import java.util.List; -@Explain(displayName = "Merge File Operator") +@Explain(displayName = "Merge File Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class MergeFileWork extends MapWork { private static final Log LOG = LogFactory.getLog(MergeFileWork.class); Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java (working copy) @@ -21,16 +21,13 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.DiskRange; -import org.apache.hadoop.hive.common.DiskRangeList; import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk; -import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim; import com.google.common.annotations.VisibleForTesting; Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (working copy) @@ -21,14 +21,16 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.TreeMap; +import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; +import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; @@ -38,15 +40,12 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; @@ -60,7 +59,6 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.ql.log.PerfLogger; -import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -72,13 +70,13 @@ import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.InvalidInputException; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.StringUtils; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; +import com.google.common.collect.Lists; import com.google.common.util.concurrent.ThreadFactoryBuilder; /** * A MapReduce/Hive input format for ORC files. @@ -107,7 +105,14 @@ InputFormatChecker, VectorizedInputFormatInterface, AcidInputFormat, CombineHiveInputFormat.AvoidSplitCombination { + static enum SplitStrategyKind{ + HYBRID, + BI, + ETL + } + private static final Log LOG = LogFactory.getLog(OrcInputFormat.class); + private static boolean isDebugEnabled = LOG.isDebugEnabled(); static final HadoopShims SHIMS = ShimLoader.getHadoopShims(); static final String MIN_SPLIT_SIZE = SHIMS.getHadoopConfNames().get("MAPREDMINSPLITSIZE"); @@ -265,7 +270,6 @@ } /** * Take the configuration and figure out which columns we need to include. - * @param options the options to update * @param types the types for the file * @param conf the configuration * @param isOriginal is the file in the original format? @@ -366,30 +370,28 @@ static class Context { private final Configuration conf; private static Cache footerCache; - private final ExecutorService threadPool; - private final List splits = - new ArrayList(10000); + private static ExecutorService threadPool = null; private final int numBuckets; - private final List errors = new ArrayList(); private final long maxSize; private final long minSize; private final boolean footerInSplits; private final boolean cacheStripeDetails; private final AtomicInteger cacheHitCounter = new AtomicInteger(0); private final AtomicInteger numFilesCounter = new AtomicInteger(0); - private Throwable fatalError = null; private ValidTxnList transactionList; + private SplitStrategyKind splitStrategyKind; - /** - * A count of the number of threads that may create more work for the - * thread pool. - */ - private int schedulers = 0; - Context(Configuration conf) { this.conf = conf; minSize = conf.getLong(MIN_SPLIT_SIZE, DEFAULT_MIN_SPLIT_SIZE); maxSize = conf.getLong(MAX_SPLIT_SIZE, DEFAULT_MAX_SPLIT_SIZE); + String ss = conf.get(ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname); + if (ss == null || ss.equals(SplitStrategyKind.HYBRID.name())) { + splitStrategyKind = SplitStrategyKind.HYBRID; + } else { + LOG.info("Enforcing " + ss + " ORC split strategy"); + splitStrategyKind = SplitStrategyKind.valueOf(ss); + } footerInSplits = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS); numBuckets = @@ -402,11 +404,13 @@ cacheStripeDetails = (cacheStripeDetailsSize > 0); - threadPool = Executors.newFixedThreadPool(numThreads, - new ThreadFactoryBuilder().setDaemon(true) - .setNameFormat("ORC_GET_SPLITS #%d").build()); + synchronized (Context.class) { + if (threadPool == null) { + threadPool = Executors.newFixedThreadPool(numThreads, + new ThreadFactoryBuilder().setDaemon(true) + .setNameFormat("ORC_GET_SPLITS #%d").build()); + } - synchronized (Context.class) { if (footerCache == null && cacheStripeDetails) { footerCache = CacheBuilder.newBuilder().concurrencyLevel(numThreads) .initialCapacity(cacheStripeDetailsSize).softValues().build(); @@ -416,87 +420,204 @@ Long.MAX_VALUE + ":"); transactionList = new ValidReadTxnList(value); } + } - int getSchedulers() { - return schedulers; - } + interface SplitStrategy { + List getSplits() throws IOException; + } - /** - * Get the Nth split. - * @param index if index >= 0, count from the front, otherwise count from - * the back. - * @return the Nth file split - */ - OrcSplit getResult(int index) { - if (index >= 0) { - return splits.get(index); - } else { - return splits.get(splits.size() + index); - } + static final class SplitInfo extends ACIDSplitStrategy { + private final Context context; + private final FileSystem fs; + private final FileStatus file; + private final FileInfo fileInfo; + private final boolean isOriginal; + private final List deltas; + private final boolean hasBase; + + SplitInfo(Context context, FileSystem fs, + FileStatus file, FileInfo fileInfo, + boolean isOriginal, + List deltas, + boolean hasBase, Path dir, boolean[] covered) throws IOException { + super(dir, context.numBuckets, deltas, covered); + this.context = context; + this.fs = fs; + this.file = file; + this.fileInfo = fileInfo; + this.isOriginal = isOriginal; + this.deltas = deltas; + this.hasBase = hasBase; } + } - List getErrors() { - return errors; + /** + * ETL strategy is used when spending little more time in split generation is acceptable + * (split generation reads and caches file footers). + */ + static final class ETLSplitStrategy implements SplitStrategy { + Context context; + FileSystem fs; + List files; + boolean isOriginal; + List deltas; + Path dir; + boolean[] covered; + + public ETLSplitStrategy(Context context, FileSystem fs, Path dir, List children, + boolean isOriginal, List deltas, boolean[] covered) { + this.context = context; + this.dir = dir; + this.fs = fs; + this.files = children; + this.isOriginal = isOriginal; + this.deltas = deltas; + this.covered = covered; } - /** - * Add a unit of work. - * @param runnable the object to run - */ - synchronized void schedule(Runnable runnable) { - if (fatalError == null) { - if (runnable instanceof FileGenerator || - runnable instanceof SplitGenerator) { - schedulers += 1; + private FileInfo verifyCachedFileInfo(FileStatus file) { + context.numFilesCounter.incrementAndGet(); + FileInfo fileInfo = Context.footerCache.getIfPresent(file.getPath()); + if (fileInfo != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Info cached for path: " + file.getPath()); } - threadPool.execute(runnable); + if (fileInfo.modificationTime == file.getModificationTime() && + fileInfo.size == file.getLen()) { + // Cached copy is valid + context.cacheHitCounter.incrementAndGet(); + return fileInfo; + } else { + // Invalidate + Context.footerCache.invalidate(file.getPath()); + if (LOG.isDebugEnabled()) { + LOG.debug("Meta-Info for : " + file.getPath() + + " changed. CachedModificationTime: " + + fileInfo.modificationTime + ", CurrentModificationTime: " + + file.getModificationTime() + + ", CachedLength: " + fileInfo.size + ", CurrentLength: " + + file.getLen()); + } + } } else { - throw new RuntimeException("serious problem", fatalError); + if (LOG.isDebugEnabled()) { + LOG.debug("Info not cached for path: " + file.getPath()); + } } + return null; } - /** - * Mark a worker that may generate more work as done. - */ - synchronized void decrementSchedulers() { - schedulers -= 1; - if (schedulers == 0) { - notify(); + @Override + public List getSplits() throws IOException { + List result = Lists.newArrayList(); + for (FileStatus file : files) { + FileInfo info = null; + if (context.cacheStripeDetails) { + info = verifyCachedFileInfo(file); + } + // ignore files of 0 length + if (file.getLen() > 0) { + result.add(new SplitInfo(context, fs, file, info, isOriginal, deltas, true, dir, covered)); + } } + return result; } - synchronized void notifyOnNonIOException(Throwable th) { - fatalError = th; - notify(); + @Override + public String toString() { + return ETLSplitStrategy.class.getSimpleName() + " strategy for " + dir; } + } - /** - * Wait until all of the tasks are done. It waits until all of the - * threads that may create more work are done and then shuts down the - * thread pool and waits for the final threads to finish. - */ - synchronized void waitForTasks() { - try { - while (schedulers != 0) { - wait(); - if (fatalError != null) { - threadPool.shutdownNow(); - throw new RuntimeException("serious problem", fatalError); + /** + * BI strategy is used when the requirement is to spend less time in split generation + * as opposed to query execution (split generation does not read or cache file footers). + */ + static final class BISplitStrategy extends ACIDSplitStrategy { + List fileStatuses; + boolean isOriginal; + List deltas; + FileSystem fs; + Context context; + Path dir; + + public BISplitStrategy(Context context, FileSystem fs, + Path dir, List fileStatuses, boolean isOriginal, + List deltas, boolean[] covered) { + super(dir, context.numBuckets, deltas, covered); + this.context = context; + this.fileStatuses = fileStatuses; + this.isOriginal = isOriginal; + this.deltas = deltas; + this.fs = fs; + this.dir = dir; + } + + @Override + public List getSplits() throws IOException { + List splits = Lists.newArrayList(); + for (FileStatus fileStatus : fileStatuses) { + String[] hosts = SHIMS.getLocationsWithOffset(fs, fileStatus).firstEntry().getValue() + .getHosts(); + OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), 0, fileStatus.getLen(), hosts, + null, isOriginal, true, deltas, -1); + splits.add(orcSplit); + } + + // add uncovered ACID delta splits + splits.addAll(super.getSplits()); + return splits; + } + + @Override + public String toString() { + return BISplitStrategy.class.getSimpleName() + " strategy for " + dir; + } + } + + /** + * ACID split strategy is used when there is no base directory (when transactions are enabled). + */ + static class ACIDSplitStrategy implements SplitStrategy { + Path dir; + List deltas; + boolean[] covered; + int numBuckets; + + public ACIDSplitStrategy(Path dir, int numBuckets, List deltas, boolean[] covered) { + this.dir = dir; + this.numBuckets = numBuckets; + this.deltas = deltas; + this.covered = covered; + } + + @Override + public List getSplits() throws IOException { + // Generate a split for any buckets that weren't covered. + // This happens in the case where a bucket just has deltas and no + // base. + List splits = Lists.newArrayList(); + if (!deltas.isEmpty()) { + for (int b = 0; b < numBuckets; ++b) { + if (!covered[b]) { + splits.add(new OrcSplit(dir, b, 0, new String[0], null, false, false, deltas, -1)); } } - threadPool.shutdown(); - threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS); - } catch (InterruptedException ie) { - throw new IllegalStateException("interrupted", ie); } + return splits; } + + @Override + public String toString() { + return ACIDSplitStrategy.class.getSimpleName() + " strategy for " + dir; + } } /** * Given a directory, get the list of files and blocks in those files. - * A thread is used for each directory. + * To parallelize file generator use "mapreduce.input.fileinputformat.list-status.num-threads" */ - static final class FileGenerator implements Runnable { + static final class FileGenerator implements Callable { private final Context context; private final FileSystem fs; private final Path dir; @@ -507,116 +628,70 @@ this.dir = dir; } - private void scheduleSplits(FileStatus file, - boolean isOriginal, - boolean hasBase, - List deltas) throws IOException{ - FileInfo info = null; - if (context.cacheStripeDetails) { - info = verifyCachedFileInfo(file); - } - new SplitGenerator(context, fs, file, info, isOriginal, deltas, - hasBase).schedule(); - } - - /** - * For each path, get the list of files and blocks that they consist of. - */ @Override - public void run() { - try { - AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir, - context.conf, context.transactionList); - List deltas = - AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories()); - Path base = dirInfo.getBaseDirectory(); - List original = dirInfo.getOriginalFiles(); + public SplitStrategy call() throws IOException { + final SplitStrategy splitStrategy; + AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir, + context.conf, context.transactionList); + List deltas = AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories()); + Path base = dirInfo.getBaseDirectory(); + List original = dirInfo.getOriginalFiles(); + boolean[] covered = new boolean[context.numBuckets]; + boolean isOriginal = base == null; - boolean[] covered = new boolean[context.numBuckets]; - boolean isOriginal = base == null; + // if we have a base to work from + if (base != null || !original.isEmpty()) { - // if we have a base to work from - if (base != null || !original.isEmpty()) { + // find the base files (original or new style) + List children = original; + if (base != null) { + children = SHIMS.listLocatedStatus(fs, base, + AcidUtils.hiddenFileFilter); + } - // find the base files (original or new style) - List children = original; - if (base != null) { - children = SHIMS.listLocatedStatus(fs, base, - AcidUtils.hiddenFileFilter); + long totalFileSize = 0; + for (FileStatus child : children) { + totalFileSize += child.getLen(); + AcidOutputFormat.Options opts = AcidUtils.parseBaseBucketFilename + (child.getPath(), context.conf); + int b = opts.getBucket(); + // If the bucket is in the valid range, mark it as covered. + // I wish Hive actually enforced bucketing all of the time. + if (b >= 0 && b < covered.length) { + covered[b] = true; } - - // for each child, schedule splits and mark off the bucket - for(FileStatus child: children) { - AcidOutputFormat.Options opts = AcidUtils.parseBaseBucketFilename - (child.getPath(), context.conf); - scheduleSplits(child, isOriginal, true, deltas); - int b = opts.getBucket(); - // If the bucket is in the valid range, mark it as covered. - // I wish Hive actually enforced bucketing all of the time. - if (b >= 0 && b < covered.length) { - covered[b] = true; - } - } } - // Generate a split for any buckets that weren't covered. - // This happens in the case where a bucket just has deltas and no - // base. - if (!deltas.isEmpty()) { - for (int b = 0; b < context.numBuckets; ++b) { - if (!covered[b]) { - synchronized (context.splits) { - context.splits.add(new OrcSplit(dir, b, 0, new String[0], null, - false, false, deltas)); - } + int numFiles = children.size(); + long avgFileSize = totalFileSize / numFiles; + switch(context.splitStrategyKind) { + case BI: + // BI strategy requested through config + splitStrategy = new BISplitStrategy(context, fs, dir, children, isOriginal, + deltas, covered); + break; + case ETL: + // ETL strategy requested through config + splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal, + deltas, covered); + break; + default: + // HYBRID strategy + if (avgFileSize > context.maxSize) { + splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal, deltas, + covered); + } else { + splitStrategy = new BISplitStrategy(context, fs, dir, children, isOriginal, deltas, + covered); } - } + break; } - } catch (Throwable th) { - if (!(th instanceof IOException)) { - LOG.error("Unexpected Exception", th); - } - synchronized (context.errors) { - context.errors.add(th); - } - if (!(th instanceof IOException)) { - context.notifyOnNonIOException(th); - } - } finally { - context.decrementSchedulers(); + } else { + // no base, only deltas + splitStrategy = new ACIDSplitStrategy(dir, context.numBuckets, deltas, covered); } - } - private FileInfo verifyCachedFileInfo(FileStatus file) { - context.numFilesCounter.incrementAndGet(); - FileInfo fileInfo = Context.footerCache.getIfPresent(file.getPath()); - if (fileInfo != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("Info cached for path: " + file.getPath()); - } - if (fileInfo.modificationTime == file.getModificationTime() && - fileInfo.size == file.getLen()) { - // Cached copy is valid - context.cacheHitCounter.incrementAndGet(); - return fileInfo; - } else { - // Invalidate - Context.footerCache.invalidate(file.getPath()); - if (LOG.isDebugEnabled()) { - LOG.debug("Meta-Info for : " + file.getPath() + - " changed. CachedModificationTime: " - + fileInfo.modificationTime + ", CurrentModificationTime: " - + file.getModificationTime() - + ", CachedLength: " + fileInfo.size + ", CurrentLength: " + - file.getLen()); - } - } - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Info not cached for path: " + file.getPath()); - } - } - return null; + return splitStrategy; } } @@ -624,7 +699,7 @@ * Split the stripes of a given file into input splits. * A thread is used for each file. */ - static final class SplitGenerator implements Runnable { + static final class SplitGenerator implements Callable> { private final Context context; private final FileSystem fs; private final FileStatus file; @@ -639,40 +714,27 @@ private final List deltas; private final boolean hasBase; private OrcFile.WriterVersion writerVersion; + private long projColsUncompressedSize; + private List deltaSplits; - SplitGenerator(Context context, FileSystem fs, - FileStatus file, FileInfo fileInfo, - boolean isOriginal, - List deltas, - boolean hasBase) throws IOException { - this.context = context; - this.fs = fs; - this.file = file; + public SplitGenerator(SplitInfo splitInfo) throws IOException { + this.context = splitInfo.context; + this.fs = splitInfo.fs; + this.file = splitInfo.file; this.blockSize = file.getBlockSize(); - this.fileInfo = fileInfo; + this.fileInfo = splitInfo.fileInfo; locations = SHIMS.getLocationsWithOffset(fs, file); - this.isOriginal = isOriginal; - this.deltas = deltas; - this.hasBase = hasBase; + this.isOriginal = splitInfo.isOriginal; + this.deltas = splitInfo.deltas; + this.hasBase = splitInfo.hasBase; + this.projColsUncompressedSize = -1; + this.deltaSplits = splitInfo.getSplits(); } Path getPath() { return file.getPath(); } - void schedule() throws IOException { - if(locations.size() == 1 && file.getLen() < context.maxSize) { - String[] hosts = locations.firstEntry().getValue().getHosts(); - synchronized (context.splits) { - context.splits.add(new OrcSplit(file.getPath(), 0, file.getLen(), - hosts, fileMetaInfo, isOriginal, hasBase, deltas)); - } - } else { - // if it requires a compute task - context.schedule(this); - } - } - @Override public String toString() { return "splitter(" + file.getPath() + ")"; @@ -707,7 +769,7 @@ * @param fileMetaInfo file metadata from footer and postscript * @throws IOException */ - void createSplit(long offset, long length, + OrcSplit createSplit(long offset, long length, ReaderImpl.FileMetaInfo fileMetaInfo) throws IOException { String[] hosts; Map.Entry startEntry = locations.floorEntry(offset); @@ -761,10 +823,8 @@ hosts = new String[hostList.size()]; hostList.toArray(hosts); } - synchronized (context.splits) { - context.splits.add(new OrcSplit(file.getPath(), offset, length, - hosts, fileMetaInfo, isOriginal, hasBase, deltas)); - } + return new OrcSplit(file.getPath(), offset, length, hosts, fileMetaInfo, + isOriginal, hasBase, deltas, projColsUncompressedSize); } /** @@ -772,147 +832,125 @@ * block size and the configured minimum and maximum sizes. */ @Override - public void run() { - try { - populateAndCacheStripeDetails(); + public List call() throws IOException { + populateAndCacheStripeDetails(); + List splits = Lists.newArrayList(); - // figure out which stripes we need to read - boolean[] includeStripe = null; - // we can't eliminate stripes if there are deltas because the - // deltas may change the rows making them match the predicate. - if (deltas.isEmpty()) { - Reader.Options options = new Reader.Options(); - options.include(genIncludedColumns(types, context.conf, isOriginal)); - setSearchArgument(options, types, context.conf, isOriginal); - // only do split pruning if HIVE-8732 has been fixed in the writer - if (options.getSearchArgument() != null && - writerVersion != OrcFile.WriterVersion.ORIGINAL) { - SearchArgument sarg = options.getSearchArgument(); - List sargLeaves = sarg.getLeaves(); - List stripeStats = metadata.getStripeStatistics(); - int[] filterColumns = RecordReaderImpl.mapSargColumns(sargLeaves, - options.getColumnNames(), getRootColumn(isOriginal)); + // figure out which stripes we need to read + boolean[] includeStripe = null; + // we can't eliminate stripes if there are deltas because the + // deltas may change the rows making them match the predicate. + if (deltas.isEmpty()) { + Reader.Options options = new Reader.Options(); + options.include(genIncludedColumns(types, context.conf, isOriginal)); + setSearchArgument(options, types, context.conf, isOriginal); + // only do split pruning if HIVE-8732 has been fixed in the writer + if (options.getSearchArgument() != null && + writerVersion != OrcFile.WriterVersion.ORIGINAL) { + SearchArgument sarg = options.getSearchArgument(); + List sargLeaves = sarg.getLeaves(); + List stripeStats = metadata.getStripeStatistics(); + int[] filterColumns = RecordReaderImpl.mapSargColumns(sargLeaves, + options.getColumnNames(), getRootColumn(isOriginal)); - if (stripeStats != null) { - // eliminate stripes that doesn't satisfy the predicate condition - includeStripe = new boolean[stripes.size()]; - for(int i=0; i < stripes.size(); ++i) { - includeStripe[i] = (i >= stripeStats.size()) || - isStripeSatisfyPredicate(stripeStats.get(i), sarg, - filterColumns); - if (LOG.isDebugEnabled() && !includeStripe[i]) { - LOG.debug("Eliminating ORC stripe-" + i + " of file '" + - file.getPath() + "' as it did not satisfy " + - "predicate condition."); - } + if (stripeStats != null) { + // eliminate stripes that doesn't satisfy the predicate condition + includeStripe = new boolean[stripes.size()]; + for (int i = 0; i < stripes.size(); ++i) { + includeStripe[i] = (i >= stripeStats.size()) || + isStripeSatisfyPredicate(stripeStats.get(i), sarg, + filterColumns); + if (LOG.isDebugEnabled() && !includeStripe[i]) { + LOG.debug("Eliminating ORC stripe-" + i + " of file '" + + file.getPath() + "' as it did not satisfy " + + "predicate condition."); } } } } + } - // if we didn't have predicate pushdown, read everything - if (includeStripe == null) { - includeStripe = new boolean[stripes.size()]; - Arrays.fill(includeStripe, true); - } + // if we didn't have predicate pushdown, read everything + if (includeStripe == null) { + includeStripe = new boolean[stripes.size()]; + Arrays.fill(includeStripe, true); + } - long currentOffset = -1; - long currentLength = 0; - int idx = -1; - for(StripeInformation stripe: stripes) { - idx++; + long currentOffset = -1; + long currentLength = 0; + int idx = -1; + for (StripeInformation stripe : stripes) { + idx++; - if (!includeStripe[idx]) { - // create split for the previous unfinished stripe - if (currentOffset != -1) { - createSplit(currentOffset, currentLength, fileMetaInfo); - currentOffset = -1; - } - continue; - } - - // if we are working on a stripe, over the min stripe size, and - // crossed a block boundary, cut the input split here. - if (currentOffset != -1 && currentLength > context.minSize && - (currentOffset / blockSize != stripe.getOffset() / blockSize)) { - createSplit(currentOffset, currentLength, fileMetaInfo); + if (!includeStripe[idx]) { + // create split for the previous unfinished stripe + if (currentOffset != -1) { + splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); currentOffset = -1; } - // if we aren't building a split, start a new one. - if (currentOffset == -1) { - currentOffset = stripe.getOffset(); - currentLength = stripe.getLength(); - } else { - currentLength = - (stripe.getOffset() + stripe.getLength()) - currentOffset; - } - if (currentLength >= context.maxSize) { - createSplit(currentOffset, currentLength, fileMetaInfo); - currentOffset = -1; - } + continue; } - if (currentOffset != -1) { - createSplit(currentOffset, currentLength, fileMetaInfo); + + // if we are working on a stripe, over the min stripe size, and + // crossed a block boundary, cut the input split here. + if (currentOffset != -1 && currentLength > context.minSize && + (currentOffset / blockSize != stripe.getOffset() / blockSize)) { + splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); + currentOffset = -1; } - } catch (Throwable th) { - if (!(th instanceof IOException)) { - LOG.error("Unexpected Exception", th); + // if we aren't building a split, start a new one. + if (currentOffset == -1) { + currentOffset = stripe.getOffset(); + currentLength = stripe.getLength(); + } else { + currentLength = + (stripe.getOffset() + stripe.getLength()) - currentOffset; } - synchronized (context.errors) { - context.errors.add(th); + if (currentLength >= context.maxSize) { + splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); + currentOffset = -1; } - if (!(th instanceof IOException)) { - context.notifyOnNonIOException(th); - } - } finally { - context.decrementSchedulers(); } + if (currentOffset != -1) { + splits.add(createSplit(currentOffset, currentLength, fileMetaInfo)); + } + + // add uncovered ACID delta splits + splits.addAll(deltaSplits); + return splits; } - private void populateAndCacheStripeDetails() { - try { - Reader orcReader; - if (fileInfo != null) { - stripes = fileInfo.stripeInfos; - fileMetaInfo = fileInfo.fileMetaInfo; - metadata = fileInfo.metadata; - types = fileInfo.types; - writerVersion = fileInfo.writerVersion; - // For multiple runs, in case sendSplitsInFooter changes - if (fileMetaInfo == null && context.footerInSplits) { - orcReader = OrcFile.createReader(file.getPath(), - OrcFile.readerOptions(context.conf).filesystem(fs)); - fileInfo.fileMetaInfo = ((ReaderImpl) orcReader).getFileMetaInfo(); - fileInfo.metadata = orcReader.getMetadata(); - fileInfo.types = orcReader.getTypes(); - fileInfo.writerVersion = orcReader.getWriterVersion(); - } - } else { - orcReader = OrcFile.createReader(file.getPath(), - OrcFile.readerOptions(context.conf).filesystem(fs)); - stripes = orcReader.getStripes(); - metadata = orcReader.getMetadata(); - types = orcReader.getTypes(); - writerVersion = orcReader.getWriterVersion(); - fileMetaInfo = context.footerInSplits ? - ((ReaderImpl) orcReader).getFileMetaInfo() : null; - if (context.cacheStripeDetails) { - // Populate into cache. - Context.footerCache.put(file.getPath(), - new FileInfo(file.getModificationTime(), file.getLen(), stripes, - metadata, types, fileMetaInfo, writerVersion)); - } + private void populateAndCacheStripeDetails() throws IOException { + Reader orcReader = OrcFile.createReader(file.getPath(), + OrcFile.readerOptions(context.conf).filesystem(fs)); + List projCols = ColumnProjectionUtils.getReadColumnNames(context.conf); + projColsUncompressedSize = orcReader.getRawDataSizeOfColumns(projCols); + if (fileInfo != null) { + stripes = fileInfo.stripeInfos; + fileMetaInfo = fileInfo.fileMetaInfo; + metadata = fileInfo.metadata; + types = fileInfo.types; + writerVersion = fileInfo.writerVersion; + // For multiple runs, in case sendSplitsInFooter changes + if (fileMetaInfo == null && context.footerInSplits) { + fileInfo.fileMetaInfo = ((ReaderImpl) orcReader).getFileMetaInfo(); + fileInfo.metadata = orcReader.getMetadata(); + fileInfo.types = orcReader.getTypes(); + fileInfo.writerVersion = orcReader.getWriterVersion(); } - } catch (Throwable th) { - if (!(th instanceof IOException)) { - LOG.error("Unexpected Exception", th); + } else { + stripes = orcReader.getStripes(); + metadata = orcReader.getMetadata(); + types = orcReader.getTypes(); + writerVersion = orcReader.getWriterVersion(); + fileMetaInfo = context.footerInSplits ? + ((ReaderImpl) orcReader).getFileMetaInfo() : null; + if (context.cacheStripeDetails) { + // Populate into cache. + Context.footerCache.put(file.getPath(), + new FileInfo(file.getModificationTime(), file.getLen(), stripes, + metadata, types, fileMetaInfo, writerVersion)); } - synchronized (context.errors) { - context.errors.add(th); - } - if (!(th instanceof IOException)) { - context.notifyOnNonIOException(th); - } } } @@ -943,31 +981,66 @@ throws IOException { // use threads to resolve directories into splits Context context = new Context(conf); - for(Path dir: getInputPaths(conf)) { + List splits = Lists.newArrayList(); + List> pathFutures = Lists.newArrayList(); + List> splitFutures = Lists.newArrayList(); + + // multi-threaded file statuses and split strategy + for (Path dir : getInputPaths(conf)) { FileSystem fs = dir.getFileSystem(conf); - context.schedule(new FileGenerator(context, fs, dir)); + FileGenerator fileGenerator = new FileGenerator(context, fs, dir); + pathFutures.add(context.threadPool.submit(fileGenerator)); } - context.waitForTasks(); - // deal with exceptions - if (!context.errors.isEmpty()) { - List errors = - new ArrayList(context.errors.size()); - for(Throwable th: context.errors) { - if (th instanceof IOException) { - errors.add((IOException) th); + + // complete path futures and schedule split generation + try { + for (Future pathFuture : pathFutures) { + SplitStrategy splitStrategy = (SplitStrategy) pathFuture.get(); + + if (isDebugEnabled) { + LOG.debug(splitStrategy); + } + + if (splitStrategy instanceof ETLSplitStrategy) { + List splitInfos = splitStrategy.getSplits(); + for (SplitInfo splitInfo : splitInfos) { + splitFutures.add(context.threadPool.submit(new SplitGenerator(splitInfo))); + } } else { - throw new RuntimeException("serious problem", th); + splits.addAll(splitStrategy.getSplits()); } } - throw new InvalidInputException(errors); + + // complete split futures + for (Future splitFuture : splitFutures) { + splits.addAll((Collection) splitFuture.get()); + } + } catch (Exception e) { + cancelFutures(pathFutures); + cancelFutures(splitFutures); + throw new RuntimeException("serious problem", e); } + if (context.cacheStripeDetails) { LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/" + context.numFilesCounter.get()); } - return context.splits; + + if (isDebugEnabled) { + for (OrcSplit split : splits) { + LOG.debug(split + " projected_columns_uncompressed_size: " + + split.getProjectedColumnsUncompressedSize()); + } + } + return splits; } + private static void cancelFutures(List> futures) { + for (Future future : futures) { + future.cancel(true); + } + } + @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java (working copy) @@ -460,7 +460,7 @@ Path deltaFile = AcidUtils.createBucketFile(delta, bucket); FileSystem fs = deltaFile.getFileSystem(conf); long length = getLastFlushLength(fs, deltaFile); - if (fs.exists(deltaFile) && length != -1) { + if (length != -1 && fs.exists(deltaFile)) { Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); ReaderPair deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java (working copy) @@ -43,6 +43,7 @@ private boolean hasBase; private final List deltas = new ArrayList(); private OrcFile.WriterVersion writerVersion; + private long projColsUncompressedSize; static final int BASE_FLAG = 4; static final int ORIGINAL_FLAG = 2; @@ -57,13 +58,14 @@ public OrcSplit(Path path, long offset, long length, String[] hosts, ReaderImpl.FileMetaInfo fileMetaInfo, boolean isOriginal, boolean hasBase, - List deltas) { + List deltas, long projectedDataSize) { super(path, offset, length, hosts); this.fileMetaInfo = fileMetaInfo; hasFooter = this.fileMetaInfo != null; this.isOriginal = isOriginal; this.hasBase = hasBase; this.deltas.addAll(deltas); + this.projColsUncompressedSize = projectedDataSize; } @Override @@ -149,4 +151,8 @@ public List getDeltas() { return deltas; } + + public long getProjectedColumnsUncompressedSize() { + return projColsUncompressedSize; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (working copy) @@ -19,22 +19,16 @@ import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_ZEROCOPY; -import java.io.EOFException; import java.io.IOException; import java.math.BigDecimal; -import java.math.BigInteger; import java.nio.ByteBuffer; import java.sql.Date; import java.sql.Timestamp; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.TimeZone; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.exception.ExceptionUtils; @@ -49,35 +43,18 @@ import org.apache.hadoop.hive.common.DiskRangeList.DiskRangeListCreateHelper; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.io.filters.BloomFilter; import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils.ByteBufferAllocatorPool; +import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory.TreeReader; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; -import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim; -import org.apache.hadoop.io.BooleanWritable; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; class RecordReaderImpl implements RecordReader { @@ -224,7 +201,7 @@ firstRow = skippedRows; totalRowCount = rows; boolean skipCorrupt = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA); - reader = createTreeReader(0, types, included, skipCorrupt); + reader = TreeReaderFactory.createTreeReader(0, types, included, skipCorrupt); indexes = new OrcProto.RowIndex[types.size()]; bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()]; advanceToNextRow(reader, 0L, true); @@ -249,2263 +226,6 @@ } } - public abstract static class TreeReader { - protected final int columnId; - public BitFieldReader present = null; - protected boolean valuePresent = false; - - public TreeReader(int columnId) throws IOException { - this(columnId, null); - } - - public TreeReader(int columnId, InStream in) throws IOException { - this.columnId = columnId; - if (in == null) { - present = null; - valuePresent = true; - } else { - present = new BitFieldReader(in, 1); - } - } - - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind, - InStream in, - boolean signed, boolean skipCorrupt) throws IOException { - switch (kind) { - case DIRECT_V2: - case DICTIONARY_V2: - return new RunLengthIntegerReaderV2(in, signed, skipCorrupt); - case DIRECT: - case DICTIONARY: - return new RunLengthIntegerReader(in, signed); - default: - throw new IllegalArgumentException("Unknown encoding " + kind); - } - } - - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - checkEncoding(stripeFooter.getColumnsList().get(columnId)); - InStream in = streams.get(new StreamName(columnId, - OrcProto.Stream.Kind.PRESENT)); - if (in == null) { - present = null; - valuePresent = true; - } else { - present = new BitFieldReader(in, 1); - } - } - - /** - * Seek to the given position. - * @param index the indexes loaded from the file - * @throws IOException - */ - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - public void seek(PositionProvider index) throws IOException { - if (present != null) { - present.seek(index); - } - } - - protected long countNonNulls(long rows) throws IOException { - if (present != null) { - long result = 0; - for(long c=0; c < rows; ++c) { - if (present.next() == 1) { - result += 1; - } - } - return result; - } else { - return rows; - } - } - - abstract void skipRows(long rows) throws IOException; - - Object next(Object previous) throws IOException { - if (present != null) { - valuePresent = present.next() == 1; - } - return previous; - } - /** - * Populates the isNull vector array in the previousVector object based on - * the present stream values. This function is called from all the child - * readers, and they all set the values based on isNull field value. - * @param previousVector The columnVector object whose isNull value is populated - * @param batchSize Size of the column vector - * @return - * @throws IOException - */ - public Object nextVector(Object previousVector, long batchSize) throws IOException { - ColumnVector result = (ColumnVector) previousVector; - if (present != null) { - // Set noNulls and isNull vector of the ColumnVector based on - // present stream - result.noNulls = true; - for (int i = 0; i < batchSize; i++) { - result.isNull[i] = (present.next() != 1); - if (result.noNulls && result.isNull[i]) { - result.noNulls = false; - } - } - } else { - // There is not present stream, this means that all the values are - // present. - result.noNulls = true; - for (int i = 0; i < batchSize; i++) { - result.isNull[i] = false; - } - } - return previousVector; - } - } - - public static class BooleanTreeReader extends TreeReader { - protected BitFieldReader reader = null; - - public BooleanTreeReader(int columnId) throws IOException { - this(columnId, null, null); - } - - public BooleanTreeReader(int columnId, InStream present, InStream data) throws IOException { - super(columnId, present); - if (data != null) { - reader = new BitFieldReader(data, 1); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - reader = new BitFieldReader(streams.get(new StreamName(columnId, - OrcProto.Stream.Kind.DATA)), 1); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - reader.seek(index); - } - - @Override - void skipRows(long items) throws IOException { - reader.skip(countNonNulls(items)); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - BooleanWritable result = null; - if (valuePresent) { - if (previous == null) { - result = new BooleanWritable(); - } else { - result = (BooleanWritable) previous; - } - result.set(reader.next() == 1); - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - LongColumnVector result = null; - if (previousVector == null) { - result = new LongColumnVector(); - } else { - result = (LongColumnVector) previousVector; - } - - // Read present/isNull stream - super.nextVector(result, batchSize); - - // Read value entries based on isNull entries - reader.nextVector(result, batchSize); - return result; - } - } - - public static class ByteTreeReader extends TreeReader{ - protected RunLengthByteReader reader = null; - - ByteTreeReader(int columnId) throws IOException { - this(columnId, null, null); - } - - public ByteTreeReader(int columnId, InStream present, InStream data) throws IOException { - super(columnId, present); - this.reader = new RunLengthByteReader(data); - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - reader = new RunLengthByteReader(streams.get(new StreamName(columnId, - OrcProto.Stream.Kind.DATA))); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - reader.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - ByteWritable result = null; - if (valuePresent) { - if (previous == null) { - result = new ByteWritable(); - } else { - result = (ByteWritable) previous; - } - result.set(reader.next()); - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - LongColumnVector result = null; - if (previousVector == null) { - result = new LongColumnVector(); - } else { - result = (LongColumnVector) previousVector; - } - - // Read present/isNull stream - super.nextVector(result, batchSize); - - // Read value entries based on isNull entries - reader.nextVector(result, batchSize); - return result; - } - - @Override - void skipRows(long items) throws IOException { - reader.skip(countNonNulls(items)); - } - } - - public static class ShortTreeReader extends TreeReader{ - protected IntegerReader reader = null; - - public ShortTreeReader(int columnId) throws IOException { - this(columnId, null, null, null); - } - - public ShortTreeReader(int columnId, InStream present, InStream data, - OrcProto.ColumnEncoding encoding) - throws IOException { - super(columnId, present); - if (data != null && encoding != null) { - checkEncoding(encoding); - this.reader = createIntegerReader(encoding.getKind(), data, true, false); - } - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && - (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - StreamName name = new StreamName(columnId, - OrcProto.Stream.Kind.DATA); - reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(name), true, false); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - reader.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - ShortWritable result = null; - if (valuePresent) { - if (previous == null) { - result = new ShortWritable(); - } else { - result = (ShortWritable) previous; - } - result.set((short) reader.next()); - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - LongColumnVector result = null; - if (previousVector == null) { - result = new LongColumnVector(); - } else { - result = (LongColumnVector) previousVector; - } - - // Read present/isNull stream - super.nextVector(result, batchSize); - - // Read value entries based on isNull entries - reader.nextVector(result, batchSize); - return result; - } - - @Override - void skipRows(long items) throws IOException { - reader.skip(countNonNulls(items)); - } - } - - public static class IntTreeReader extends TreeReader{ - protected IntegerReader reader = null; - - public IntTreeReader(int columnId) throws IOException { - this(columnId, null, null, null); - } - - public IntTreeReader(int columnId, InStream present, InStream data, - OrcProto.ColumnEncoding encoding) - throws IOException { - super(columnId, present); - if (data != null && encoding != null) { - checkEncoding(encoding); - this.reader = createIntegerReader(encoding.getKind(), data, true, false); - } - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && - (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - StreamName name = new StreamName(columnId, - OrcProto.Stream.Kind.DATA); - reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(name), true, false); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - reader.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - IntWritable result = null; - if (valuePresent) { - if (previous == null) { - result = new IntWritable(); - } else { - result = (IntWritable) previous; - } - result.set((int) reader.next()); - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - LongColumnVector result = null; - if (previousVector == null) { - result = new LongColumnVector(); - } else { - result = (LongColumnVector) previousVector; - } - - // Read present/isNull stream - super.nextVector(result, batchSize); - - // Read value entries based on isNull entries - reader.nextVector(result, batchSize); - return result; - } - - @Override - void skipRows(long items) throws IOException { - reader.skip(countNonNulls(items)); - } - } - - public static class LongTreeReader extends TreeReader{ - protected IntegerReader reader = null; - - LongTreeReader(int columnId, boolean skipCorrupt) throws IOException { - this(columnId, null, null, null, skipCorrupt); - } - - public LongTreeReader(int columnId, InStream present, InStream data, - OrcProto.ColumnEncoding encoding, - boolean skipCorrupt) - throws IOException { - super(columnId, present); - if (data != null && encoding != null) { - checkEncoding(encoding); - this.reader = createIntegerReader(encoding.getKind(), data, true, skipCorrupt); - } - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && - (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - StreamName name = new StreamName(columnId, - OrcProto.Stream.Kind.DATA); - reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(name), true, false); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - reader.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - LongWritable result = null; - if (valuePresent) { - if (previous == null) { - result = new LongWritable(); - } else { - result = (LongWritable) previous; - } - result.set(reader.next()); - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - LongColumnVector result = null; - if (previousVector == null) { - result = new LongColumnVector(); - } else { - result = (LongColumnVector) previousVector; - } - - // Read present/isNull stream - super.nextVector(result, batchSize); - - // Read value entries based on isNull entries - reader.nextVector(result, batchSize); - return result; - } - - @Override - void skipRows(long items) throws IOException { - reader.skip(countNonNulls(items)); - } - } - - public static class FloatTreeReader extends TreeReader{ - protected InStream stream; - private final SerializationUtils utils; - - public FloatTreeReader(int columnId) throws IOException { - this(columnId, null, null); - } - - public FloatTreeReader(int columnId, InStream present, InStream data) throws IOException { - super(columnId, present); - this.utils = new SerializationUtils(); - this.stream = data; - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - StreamName name = new StreamName(columnId, - OrcProto.Stream.Kind.DATA); - stream = streams.get(name); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - stream.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - FloatWritable result = null; - if (valuePresent) { - if (previous == null) { - result = new FloatWritable(); - } else { - result = (FloatWritable) previous; - } - result.set(utils.readFloat(stream)); - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - DoubleColumnVector result = null; - if (previousVector == null) { - result = new DoubleColumnVector(); - } else { - result = (DoubleColumnVector) previousVector; - } - - // Read present/isNull stream - super.nextVector(result, batchSize); - - // Read value entries based on isNull entries - for (int i = 0; i < batchSize; i++) { - if (!result.isNull[i]) { - result.vector[i] = utils.readFloat(stream); - } else { - - // If the value is not present then set NaN - result.vector[i] = Double.NaN; - } - } - - // Set isRepeating flag - result.isRepeating = true; - for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) { - if (result.vector[i] != result.vector[i + 1]) { - result.isRepeating = false; - } - } - return result; - } - - @Override - protected void skipRows(long items) throws IOException { - items = countNonNulls(items); - for(int i=0; i < items; ++i) { - utils.readFloat(stream); - } - } - } - - public static class DoubleTreeReader extends TreeReader{ - protected InStream stream; - private final SerializationUtils utils; - - public DoubleTreeReader(int columnId) throws IOException { - this(columnId, null, null); - } - - public DoubleTreeReader(int columnId, InStream present, InStream data) throws IOException { - super(columnId, present); - this.utils = new SerializationUtils(); - this.stream = data; - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - StreamName name = - new StreamName(columnId, - OrcProto.Stream.Kind.DATA); - stream = streams.get(name); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - stream.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - DoubleWritable result = null; - if (valuePresent) { - if (previous == null) { - result = new DoubleWritable(); - } else { - result = (DoubleWritable) previous; - } - result.set(utils.readDouble(stream)); - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - DoubleColumnVector result = null; - if (previousVector == null) { - result = new DoubleColumnVector(); - } else { - result = (DoubleColumnVector) previousVector; - } - - // Read present/isNull stream - super.nextVector(result, batchSize); - - // Read value entries based on isNull entries - for (int i = 0; i < batchSize; i++) { - if (!result.isNull[i]) { - result.vector[i] = utils.readDouble(stream); - } else { - // If the value is not present then set NaN - result.vector[i] = Double.NaN; - } - } - - // Set isRepeating flag - result.isRepeating = true; - for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) { - if (result.vector[i] != result.vector[i + 1]) { - result.isRepeating = false; - } - } - return result; - } - - @Override - void skipRows(long items) throws IOException { - items = countNonNulls(items); - stream.skip(items * 8); - } - } - - public static class BinaryTreeReader extends TreeReader{ - protected InStream stream; - protected IntegerReader lengths = null; - - protected final LongColumnVector scratchlcv; - - BinaryTreeReader(int columnId) throws IOException { - this(columnId, null, null, null, null); - } - - public BinaryTreeReader(int columnId, InStream present, InStream data, InStream length, - OrcProto.ColumnEncoding encoding) throws IOException { - super(columnId, present); - scratchlcv = new LongColumnVector(); - this.stream = data; - if (length != null && encoding != null) { - checkEncoding(encoding); - this.lengths = createIntegerReader(encoding.getKind(), length, false, false); - } - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && - (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - StreamName name = new StreamName(columnId, - OrcProto.Stream.Kind.DATA); - stream = streams.get(name); - lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - stream.seek(index); - lengths.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - BytesWritable result = null; - if (valuePresent) { - if (previous == null) { - result = new BytesWritable(); - } else { - result = (BytesWritable) previous; - } - int len = (int) lengths.next(); - result.setSize(len); - int offset = 0; - while (len > 0) { - int written = stream.read(result.getBytes(), offset, len); - if (written < 0) { - throw new EOFException("Can't finish byte read from " + stream); - } - len -= written; - offset += written; - } - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - BytesColumnVector result = null; - if (previousVector == null) { - result = new BytesColumnVector(); - } else { - result = (BytesColumnVector) previousVector; - } - - // Read present/isNull stream - super.nextVector(result, batchSize); - - BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); - return result; - } - - @Override - void skipRows(long items) throws IOException { - items = countNonNulls(items); - long lengthToSkip = 0; - for(int i=0; i < items; ++i) { - lengthToSkip += lengths.next(); - } - stream.skip(lengthToSkip); - } - } - - private static class TimestampTreeReader extends TreeReader{ - private IntegerReader data = null; - private IntegerReader nanos = null; - private final boolean skipCorrupt; - private Map baseTimestampMap; - private long base_timestamp; - private final TimeZone readerTimeZone; - private TimeZone writerTimeZone; - private boolean hasSameTZRules; - - TimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException { - this(columnId, null, null, null, null, skipCorrupt); - } - - public TimestampTreeReader(int columnId, InStream presentStream, InStream dataStream, - InStream nanosStream, OrcProto.ColumnEncoding encoding, boolean skipCorrupt) - throws IOException { - super(columnId, presentStream); - this.skipCorrupt = skipCorrupt; - this.baseTimestampMap = new HashMap<>(); - this.readerTimeZone = TimeZone.getDefault(); - this.writerTimeZone = readerTimeZone; - this.hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone); - this.base_timestamp = getBaseTimestamp(readerTimeZone.getID()); - if (encoding != null) { - checkEncoding(encoding); - - if (dataStream != null) { - this.data = createIntegerReader(encoding.getKind(), dataStream, true, skipCorrupt); - } - - if (nanosStream != null) { - this.nanos = createIntegerReader(encoding.getKind(), nanosStream, false, skipCorrupt); - } - } - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && - (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - data = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(new StreamName(columnId, - OrcProto.Stream.Kind.DATA)), true, skipCorrupt); - nanos = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(new StreamName(columnId, - OrcProto.Stream.Kind.SECONDARY)), false, skipCorrupt); - base_timestamp = getBaseTimestamp(stripeFooter.getWriterTimezone()); - } - - private long getBaseTimestamp(String timeZoneId) throws IOException { - // to make sure new readers read old files in the same way - if (timeZoneId == null || timeZoneId.isEmpty()) { - timeZoneId = readerTimeZone.getID(); - } - - if (!baseTimestampMap.containsKey(timeZoneId)) { - writerTimeZone = TimeZone.getTimeZone(timeZoneId); - hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - sdf.setTimeZone(writerTimeZone); - try { - long epoch = - sdf.parse(WriterImpl.BASE_TIMESTAMP_STRING).getTime() / WriterImpl.MILLIS_PER_SECOND; - baseTimestampMap.put(timeZoneId, epoch); - return epoch; - } catch (ParseException e) { - throw new IOException("Unable to create base timestamp", e); - } finally { - sdf.setTimeZone(readerTimeZone); - } - } - - return baseTimestampMap.get(timeZoneId); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - data.seek(index); - nanos.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - TimestampWritable result = null; - if (valuePresent) { - if (previous == null) { - result = new TimestampWritable(); - } else { - result = (TimestampWritable) previous; - } - long millis = (data.next() + base_timestamp) * WriterImpl.MILLIS_PER_SECOND; - int newNanos = parseNanos(nanos.next()); - // fix the rounding when we divided by 1000. - if (millis >= 0) { - millis += newNanos / 1000000; - } else { - millis -= newNanos / 1000000; - } - long offset = 0; - // If reader and writer time zones have different rules, adjust the timezone difference - // between reader and writer taking day light savings into account. - if (!hasSameTZRules) { - offset = writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(millis); - } - long adjustedMillis = millis + offset; - Timestamp ts = new Timestamp(adjustedMillis); - // Sometimes the reader timezone might have changed after adding the adjustedMillis. - // To account for that change, check for any difference in reader timezone after - // adding adjustedMillis. If so use the new offset (offset at adjustedMillis point of time). - if (!hasSameTZRules && - (readerTimeZone.getOffset(millis) != readerTimeZone.getOffset(adjustedMillis))) { - long newOffset = - writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(adjustedMillis); - adjustedMillis = millis + newOffset; - ts.setTime(adjustedMillis); - } - ts.setNanos(newNanos); - result.set(ts); - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - LongColumnVector result = null; - if (previousVector == null) { - result = new LongColumnVector(); - } else { - result = (LongColumnVector) previousVector; - } - - result.reset(); - Object obj = null; - for (int i = 0; i < batchSize; i++) { - obj = next(obj); - if (obj == null) { - result.noNulls = false; - result.isNull[i] = true; - } else { - TimestampWritable writable = (TimestampWritable) obj; - Timestamp timestamp = writable.getTimestamp(); - result.vector[i] = TimestampUtils.getTimeNanoSec(timestamp); - } - } - - return result; - } - - private static int parseNanos(long serialized) { - int zeros = 7 & (int) serialized; - int result = (int) (serialized >>> 3); - if (zeros != 0) { - for(int i =0; i <= zeros; ++i) { - result *= 10; - } - } - return result; - } - - @Override - void skipRows(long items) throws IOException { - items = countNonNulls(items); - data.skip(items); - nanos.skip(items); - } - } - - public static class DateTreeReader extends TreeReader{ - protected IntegerReader reader = null; - - DateTreeReader(int columnId) throws IOException { - this(columnId, null, null, null); - } - - public DateTreeReader(int columnId, InStream present, InStream data, - OrcProto.ColumnEncoding encoding) throws IOException { - super(columnId, present); - if (data != null && encoding != null) { - checkEncoding(encoding); - reader = createIntegerReader(encoding.getKind(), data, true, false); - } - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && - (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - StreamName name = new StreamName(columnId, - OrcProto.Stream.Kind.DATA); - reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(name), true, false); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - reader.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - DateWritable result = null; - if (valuePresent) { - if (previous == null) { - result = new DateWritable(); - } else { - result = (DateWritable) previous; - } - result.set((int) reader.next()); - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - LongColumnVector result = null; - if (previousVector == null) { - result = new LongColumnVector(); - } else { - result = (LongColumnVector) previousVector; - } - - // Read present/isNull stream - super.nextVector(result, batchSize); - - // Read value entries based on isNull entries - reader.nextVector(result, batchSize); - return result; - } - - @Override - void skipRows(long items) throws IOException { - reader.skip(countNonNulls(items)); - } - } - - public static class DecimalTreeReader extends TreeReader{ - protected InStream valueStream; - protected IntegerReader scaleReader = null; - private LongColumnVector scratchScaleVector; - - private final int precision; - private final int scale; - - DecimalTreeReader(int columnId, int precision, int scale) throws IOException { - this(columnId, precision, scale, null, null, null, null); - } - - public DecimalTreeReader(int columnId, int precision, int scale, InStream present, - InStream valueStream, InStream scaleStream, OrcProto.ColumnEncoding encoding) - throws IOException { - super(columnId, present); - this.precision = precision; - this.scale = scale; - this.scratchScaleVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - this.valueStream = valueStream; - if (scaleStream != null && encoding != null) { - checkEncoding(encoding); - this.scaleReader = createIntegerReader(encoding.getKind(), scaleStream, true, false); - } - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && - (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - valueStream = streams.get(new StreamName(columnId, - OrcProto.Stream.Kind.DATA)); - scaleReader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), true, false); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - valueStream.seek(index); - scaleReader.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - HiveDecimalWritable result = null; - if (valuePresent) { - if (previous == null) { - result = new HiveDecimalWritable(); - } else { - result = (HiveDecimalWritable) previous; - } - result.set(HiveDecimal.create(SerializationUtils.readBigInteger(valueStream), - (int) scaleReader.next())); - return HiveDecimalUtils.enforcePrecisionScale(result, precision, scale); - } - return null; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - DecimalColumnVector result = null; - if (previousVector == null) { - result = new DecimalColumnVector(precision, scale); - } else { - result = (DecimalColumnVector) previousVector; - } - - // Save the reference for isNull in the scratch vector - boolean [] scratchIsNull = scratchScaleVector.isNull; - - // Read present/isNull stream - super.nextVector(result, batchSize); - - // Read value entries based on isNull entries - if (result.isRepeating) { - if (!result.isNull[0]) { - BigInteger bInt = SerializationUtils.readBigInteger(valueStream); - short scaleInData = (short) scaleReader.next(); - HiveDecimal dec = HiveDecimal.create(bInt, scaleInData); - dec = HiveDecimalUtils.enforcePrecisionScale(dec, precision, scale); - result.set(0, dec); - } - } else { - // result vector has isNull values set, use the same to read scale vector. - scratchScaleVector.isNull = result.isNull; - scaleReader.nextVector(scratchScaleVector, batchSize); - for (int i = 0; i < batchSize; i++) { - if (!result.isNull[i]) { - BigInteger bInt = SerializationUtils.readBigInteger(valueStream); - short scaleInData = (short) scratchScaleVector.vector[i]; - HiveDecimal dec = HiveDecimal.create(bInt, scaleInData); - dec = HiveDecimalUtils.enforcePrecisionScale(dec, precision, scale); - result.set(i, dec); - } - } - } - // Switch back the null vector. - scratchScaleVector.isNull = scratchIsNull; - return result; - } - - @Override - void skipRows(long items) throws IOException { - items = countNonNulls(items); - for(int i=0; i < items; i++) { - SerializationUtils.readBigInteger(valueStream); - } - scaleReader.skip(items); - } - } - - /** - * A tree reader that will read string columns. At the start of the - * stripe, it creates an internal reader based on whether a direct or - * dictionary encoding was used. - */ - public static class StringTreeReader extends TreeReader { - protected TreeReader reader; - - public StringTreeReader(int columnId) throws IOException { - super(columnId); - } - - public StringTreeReader(int columnId, InStream present, InStream data, InStream length, - InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException { - super(columnId, present); - if (encoding != null) { - switch (encoding.getKind()) { - case DIRECT: - case DIRECT_V2: - reader = new StringDirectTreeReader(columnId, present, data, length, - encoding.getKind()); - break; - case DICTIONARY: - case DICTIONARY_V2: - reader = new StringDictionaryTreeReader(columnId, present, data, length, dictionary, - encoding); - break; - default: - throw new IllegalArgumentException("Unsupported encoding " + - encoding.getKind()); - } - } - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - reader.checkEncoding(encoding); - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - // For each stripe, checks the encoding and initializes the appropriate - // reader - switch (stripeFooter.getColumnsList().get(columnId).getKind()) { - case DIRECT: - case DIRECT_V2: - reader = new StringDirectTreeReader(columnId); - break; - case DICTIONARY: - case DICTIONARY_V2: - reader = new StringDictionaryTreeReader(columnId); - break; - default: - throw new IllegalArgumentException("Unsupported encoding " + - stripeFooter.getColumnsList().get(columnId).getKind()); - } - reader.startStripe(streams, stripeFooter); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - reader.seek(index); - } - - @Override - public void seek(PositionProvider index) throws IOException { - reader.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - return reader.next(previous); - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - return reader.nextVector(previousVector, batchSize); - } - - @Override - void skipRows(long items) throws IOException { - reader.skipRows(items); - } - } - - // This class collects together very similar methods for reading an ORC vector of byte arrays and - // creating the BytesColumnVector. - // - public static class BytesColumnVectorUtil { - - private static byte[] commonReadByteArrays(InStream stream, IntegerReader lengths, LongColumnVector scratchlcv, - BytesColumnVector result, long batchSize) throws IOException { - // Read lengths - scratchlcv.isNull = result.isNull; // Notice we are replacing the isNull vector here... - lengths.nextVector(scratchlcv, batchSize); - int totalLength = 0; - if (!scratchlcv.isRepeating) { - for (int i = 0; i < batchSize; i++) { - if (!scratchlcv.isNull[i]) { - totalLength += (int) scratchlcv.vector[i]; - } - } - } else { - if (!scratchlcv.isNull[0]) { - totalLength = (int) (batchSize * scratchlcv.vector[0]); - } - } - - // Read all the strings for this batch - byte[] allBytes = new byte[totalLength]; - int offset = 0; - int len = totalLength; - while (len > 0) { - int bytesRead = stream.read(allBytes, offset, len); - if (bytesRead < 0) { - throw new EOFException("Can't finish byte read from " + stream); - } - len -= bytesRead; - offset += bytesRead; - } - - return allBytes; - } - - // This method has the common code for reading in bytes into a BytesColumnVector. - public static void readOrcByteArrays(InStream stream, IntegerReader lengths, LongColumnVector scratchlcv, - BytesColumnVector result, long batchSize) throws IOException { - - byte[] allBytes = commonReadByteArrays(stream, lengths, scratchlcv, result, batchSize); - - // Too expensive to figure out 'repeating' by comparisons. - result.isRepeating = false; - int offset = 0; - if (!scratchlcv.isRepeating) { - for (int i = 0; i < batchSize; i++) { - if (!scratchlcv.isNull[i]) { - result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]); - offset += scratchlcv.vector[i]; - } else { - result.setRef(i, allBytes, 0, 0); - } - } - } else { - for (int i = 0; i < batchSize; i++) { - if (!scratchlcv.isNull[i]) { - result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]); - offset += scratchlcv.vector[0]; - } else { - result.setRef(i, allBytes, 0, 0); - } - } - } - } - } - - /** - * A reader for string columns that are direct encoded in the current - * stripe. - */ - public static class StringDirectTreeReader extends TreeReader { - public InStream stream; - public IntegerReader lengths; - private final LongColumnVector scratchlcv; - - StringDirectTreeReader(int columnId) throws IOException { - this(columnId, null, null, null, null); - } - - public StringDirectTreeReader(int columnId, InStream present, InStream data, InStream length, - OrcProto.ColumnEncoding.Kind encoding) throws IOException { - super(columnId, present); - this.scratchlcv = new LongColumnVector(); - this.stream = data; - if (length != null && encoding != null) { - this.lengths = createIntegerReader(encoding, length, false, false); - } - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT && - encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - StreamName name = new StreamName(columnId, - OrcProto.Stream.Kind.DATA); - stream = streams.get(name); - lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), - false, false); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - stream.seek(index); - lengths.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - Text result = null; - if (valuePresent) { - if (previous == null) { - result = new Text(); - } else { - result = (Text) previous; - } - int len = (int) lengths.next(); - int offset = 0; - byte[] bytes = new byte[len]; - while (len > 0) { - int written = stream.read(bytes, offset, len); - if (written < 0) { - throw new EOFException("Can't finish byte read from " + stream); - } - len -= written; - offset += written; - } - result.set(bytes); - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - BytesColumnVector result = null; - if (previousVector == null) { - result = new BytesColumnVector(); - } else { - result = (BytesColumnVector) previousVector; - } - - // Read present/isNull stream - super.nextVector(result, batchSize); - - BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); - return result; - } - - @Override - void skipRows(long items) throws IOException { - items = countNonNulls(items); - long lengthToSkip = 0; - for(int i=0; i < items; ++i) { - lengthToSkip += lengths.next(); - } - stream.skip(lengthToSkip); - } - } - - /** - * A reader for string columns that are dictionary encoded in the current - * stripe. - */ - public static class StringDictionaryTreeReader extends TreeReader { - private DynamicByteArray dictionaryBuffer; - private int[] dictionaryOffsets; - public IntegerReader reader; - - private byte[] dictionaryBufferInBytesCache = null; - private final LongColumnVector scratchlcv; - - StringDictionaryTreeReader(int columnId) throws IOException { - this(columnId, null, null, null, null, null); - } - - public StringDictionaryTreeReader(int columnId, InStream present, InStream data, - InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding) - throws IOException{ - super(columnId, present); - scratchlcv = new LongColumnVector(); - if (data != null && encoding != null) { - this.reader = createIntegerReader(encoding.getKind(), data, false, false); - } - - if (dictionary != null && encoding != null) { - readDictionaryStream(dictionary); - } - - if (length != null && encoding != null) { - readDictionaryLengthStream(length, encoding); - } - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY && - encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - - // read the dictionary blob - StreamName name = new StreamName(columnId, - OrcProto.Stream.Kind.DICTIONARY_DATA); - InStream in = streams.get(name); - readDictionaryStream(in); - - // read the lengths - name = new StreamName(columnId, OrcProto.Stream.Kind.LENGTH); - in = streams.get(name); - readDictionaryLengthStream(in, stripeFooter.getColumnsList().get(columnId)); - - // set up the row reader - name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); - reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(name), false, false); - } - - private void readDictionaryLengthStream(InStream in, OrcProto.ColumnEncoding encoding) - throws IOException { - int dictionarySize = encoding.getDictionarySize(); - if (in != null) { // Guard against empty LENGTH stream. - IntegerReader lenReader = createIntegerReader(encoding.getKind(), in, false, false); - int offset = 0; - if (dictionaryOffsets == null || - dictionaryOffsets.length < dictionarySize + 1) { - dictionaryOffsets = new int[dictionarySize + 1]; - } - for (int i = 0; i < dictionarySize; ++i) { - dictionaryOffsets[i] = offset; - offset += (int) lenReader.next(); - } - dictionaryOffsets[dictionarySize] = offset; - in.close(); - } - - } - - private void readDictionaryStream(InStream in) throws IOException { - if (in != null) { // Guard against empty dictionary stream. - if (in.available() > 0) { - dictionaryBuffer = new DynamicByteArray(64, in.available()); - dictionaryBuffer.readAll(in); - // Since its start of strip invalidate the cache. - dictionaryBufferInBytesCache = null; - } - in.close(); - } else { - dictionaryBuffer = null; - } - } - - @Override - void seek(PositionProvider[] index) throws IOException { - seek(index[columnId]); - } - - @Override - public void seek(PositionProvider index) throws IOException { - super.seek(index); - reader.seek(index); - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - Text result = null; - if (valuePresent) { - int entry = (int) reader.next(); - if (previous == null) { - result = new Text(); - } else { - result = (Text) previous; - } - int offset = dictionaryOffsets[entry]; - int length = getDictionaryEntryLength(entry, offset); - // If the column is just empty strings, the size will be zero, - // so the buffer will be null, in that case just return result - // as it will default to empty - if (dictionaryBuffer != null) { - dictionaryBuffer.setText(result, offset, length); - } else { - result.clear(); - } - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - BytesColumnVector result = null; - int offset = 0, length = 0; - if (previousVector == null) { - result = new BytesColumnVector(); - } else { - result = (BytesColumnVector) previousVector; - } - - // Read present/isNull stream - super.nextVector(result, batchSize); - - if (dictionaryBuffer != null) { - - // Load dictionaryBuffer into cache. - if (dictionaryBufferInBytesCache == null) { - dictionaryBufferInBytesCache = dictionaryBuffer.get(); - } - - // Read string offsets - scratchlcv.isNull = result.isNull; - reader.nextVector(scratchlcv, batchSize); - if (!scratchlcv.isRepeating) { - - // The vector has non-repeating strings. Iterate thru the batch - // and set strings one by one - for (int i = 0; i < batchSize; i++) { - if (!scratchlcv.isNull[i]) { - offset = dictionaryOffsets[(int) scratchlcv.vector[i]]; - length = getDictionaryEntryLength((int) scratchlcv.vector[i], offset); - result.setRef(i, dictionaryBufferInBytesCache, offset, length); - } else { - // If the value is null then set offset and length to zero (null string) - result.setRef(i, dictionaryBufferInBytesCache, 0, 0); - } - } - } else { - // If the value is repeating then just set the first value in the - // vector and set the isRepeating flag to true. No need to iterate thru and - // set all the elements to the same value - offset = dictionaryOffsets[(int) scratchlcv.vector[0]]; - length = getDictionaryEntryLength((int) scratchlcv.vector[0], offset); - result.setRef(0, dictionaryBufferInBytesCache, offset, length); - } - result.isRepeating = scratchlcv.isRepeating; - } else { - // Entire stripe contains null strings. - result.isRepeating = true; - result.noNulls = false; - result.isNull[0] = true; - result.setRef(0, "".getBytes(), 0, 0); - } - return result; - } - - int getDictionaryEntryLength(int entry, int offset) { - int length = 0; - // if it isn't the last entry, subtract the offsets otherwise use - // the buffer length. - if (entry < dictionaryOffsets.length - 1) { - length = dictionaryOffsets[entry + 1] - offset; - } else { - length = dictionaryBuffer.size() - offset; - } - return length; - } - - @Override - void skipRows(long items) throws IOException { - reader.skip(countNonNulls(items)); - } - } - - public static class CharTreeReader extends StringTreeReader { - int maxLength; - - public CharTreeReader(int columnId, int maxLength) throws IOException { - this(columnId, maxLength, null, null, null, null, null); - } - - public CharTreeReader(int columnId, int maxLength, InStream present, InStream data, - InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException { - super(columnId, present, data, length, dictionary, encoding); - this.maxLength = maxLength; - } - - @Override - Object next(Object previous) throws IOException { - HiveCharWritable result = null; - if (previous == null) { - result = new HiveCharWritable(); - } else { - result = (HiveCharWritable) previous; - } - // Use the string reader implementation to populate the internal Text value - Object textVal = super.next(result.getTextValue()); - if (textVal == null) { - return null; - } - // result should now hold the value that was read in. - // enforce char length - result.enforceMaxLength(maxLength); - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - // Get the vector of strings from StringTreeReader, then make a 2nd pass to - // adjust down the length (right trim and truncate) if necessary. - BytesColumnVector result = (BytesColumnVector) super.nextVector(previousVector, batchSize); - - int adjustedDownLen; - if (result.isRepeating) { - if (result.noNulls || !result.isNull[0]) { - adjustedDownLen = StringExpr.rightTrimAndTruncate(result.vector[0], result.start[0], result.length[0], maxLength); - if (adjustedDownLen < result.length[0]) { - result.setRef(0, result.vector[0], result.start[0], adjustedDownLen); - } - } - } else { - if (result.noNulls){ - for (int i = 0; i < batchSize; i++) { - adjustedDownLen = StringExpr.rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i], maxLength); - if (adjustedDownLen < result.length[i]) { - result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); - } - } - } else { - for (int i = 0; i < batchSize; i++) { - if (!result.isNull[i]) { - adjustedDownLen = StringExpr.rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i], maxLength); - if (adjustedDownLen < result.length[i]) { - result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); - } - } - } - } - } - return result; - } - } - - public static class VarcharTreeReader extends StringTreeReader { - int maxLength; - - public VarcharTreeReader(int columnId, int maxLength) throws IOException { - this(columnId, maxLength, null, null, null, null, null); - } - - public VarcharTreeReader(int columnId, int maxLength, InStream present, InStream data, - InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException { - super(columnId, present, data, length, dictionary, encoding); - this.maxLength = maxLength; - } - - @Override - Object next(Object previous) throws IOException { - HiveVarcharWritable result = null; - if (previous == null) { - result = new HiveVarcharWritable(); - } else { - result = (HiveVarcharWritable) previous; - } - // Use the string reader implementation to populate the internal Text value - Object textVal = super.next(result.getTextValue()); - if (textVal == null) { - return null; - } - // result should now hold the value that was read in. - // enforce varchar length - result.enforceMaxLength(maxLength); - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - // Get the vector of strings from StringTreeReader, then make a 2nd pass to - // adjust down the length (truncate) if necessary. - BytesColumnVector result = (BytesColumnVector) super.nextVector(previousVector, batchSize); - - int adjustedDownLen; - if (result.isRepeating) { - if (result.noNulls || !result.isNull[0]) { - adjustedDownLen = StringExpr.truncate(result.vector[0], result.start[0], result.length[0], maxLength); - if (adjustedDownLen < result.length[0]) { - result.setRef(0, result.vector[0], result.start[0], adjustedDownLen); - } - } - } else { - if (result.noNulls){ - for (int i = 0; i < batchSize; i++) { - adjustedDownLen = StringExpr.truncate(result.vector[i], result.start[i], result.length[i], maxLength); - if (adjustedDownLen < result.length[i]) { - result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); - } - } - } else { - for (int i = 0; i < batchSize; i++) { - if (!result.isNull[i]) { - adjustedDownLen = StringExpr.truncate(result.vector[i], result.start[i], result.length[i], maxLength); - if (adjustedDownLen < result.length[i]) { - result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); - } - } - } - } - } - return result; - } - } - - private static class StructTreeReader extends TreeReader { - private final TreeReader[] fields; - private final String[] fieldNames; - private final List readers; - - StructTreeReader(int columnId, - List types, - boolean[] included, - boolean skipCorrupt) throws IOException { - super(columnId); - OrcProto.Type type = types.get(columnId); - int fieldCount = type.getFieldNamesCount(); - this.fields = new TreeReader[fieldCount]; - this.fieldNames = new String[fieldCount]; - this.readers = new ArrayList(); - for(int i=0; i < fieldCount; ++i) { - int subtype = type.getSubtypes(i); - if (included == null || included[subtype]) { - this.fields[i] = createTreeReader(subtype, types, included, skipCorrupt); - readers.add(this.fields[i]); - } - this.fieldNames[i] = type.getFieldNames(i); - } - } - - @Override - void seek(PositionProvider[] index) throws IOException { - super.seek(index); - for(TreeReader kid: fields) { - if (kid != null) { - kid.seek(index); - } - } - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - OrcStruct result = null; - if (valuePresent) { - if (previous == null) { - result = new OrcStruct(fields.length); - } else { - result = (OrcStruct) previous; - - // If the input format was initialized with a file with a - // different number of fields, the number of fields needs to - // be updated to the correct number - if (result.getNumFields() != fields.length) { - result.setNumFields(fields.length); - } - } - for(int i=0; i < fields.length; ++i) { - if (fields[i] != null) { - result.setFieldValue(i, fields[i].next(result.getFieldValue(i))); - } - } - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - ColumnVector[] result = null; - if (previousVector == null) { - result = new ColumnVector[fields.length]; - } else { - result = (ColumnVector[]) previousVector; - } - - // Read all the members of struct as column vectors - for (int i = 0; i < fields.length; i++) { - if (fields[i] != null) { - if (result[i] == null) { - result[i] = (ColumnVector) fields[i].nextVector(null, batchSize); - } else { - fields[i].nextVector(result[i], batchSize); - } - } - } - return result; - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - for(TreeReader field: fields) { - if (field != null) { - field.startStripe(streams, stripeFooter); - } - } - } - - @Override - void skipRows(long items) throws IOException { - items = countNonNulls(items); - for(TreeReader field: fields) { - if (field != null) { - field.skipRows(items); - } - } - } - } - - private static class UnionTreeReader extends TreeReader { - private final TreeReader[] fields; - private RunLengthByteReader tags; - - UnionTreeReader(int columnId, - List types, - boolean[] included, - boolean skipCorrupt) throws IOException { - super(columnId); - OrcProto.Type type = types.get(columnId); - int fieldCount = type.getSubtypesCount(); - this.fields = new TreeReader[fieldCount]; - for(int i=0; i < fieldCount; ++i) { - int subtype = type.getSubtypes(i); - if (included == null || included[subtype]) { - this.fields[i] = createTreeReader(subtype, types, included, skipCorrupt); - } - } - } - - @Override - void seek(PositionProvider[] index) throws IOException { - super.seek(index); - tags.seek(index[columnId]); - for(TreeReader kid: fields) { - kid.seek(index); - } - } - - @Override - Object next(Object previous) throws IOException { - super.next(previous); - OrcUnion result = null; - if (valuePresent) { - if (previous == null) { - result = new OrcUnion(); - } else { - result = (OrcUnion) previous; - } - byte tag = tags.next(); - Object previousVal = result.getObject(); - result.set(tag, fields[tag].next(tag == result.getTag() ? - previousVal : null)); - } - return result; - } - - @Override - public Object nextVector(Object previousVector, long batchSize) throws IOException { - throw new UnsupportedOperationException( - "NextVector is not supported operation for Union type"); - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - tags = new RunLengthByteReader(streams.get(new StreamName(columnId, - OrcProto.Stream.Kind.DATA))); - for(TreeReader field: fields) { - if (field != null) { - field.startStripe(streams, stripeFooter); - } - } - } - - @Override - void skipRows(long items) throws IOException { - items = countNonNulls(items); - long[] counts = new long[fields.length]; - for(int i=0; i < items; ++i) { - counts[tags.next()] += 1; - } - for(int i=0; i < counts.length; ++i) { - fields[i].skipRows(counts[i]); - } - } - } - - private static class ListTreeReader extends TreeReader { - private final TreeReader elementReader; - private IntegerReader lengths = null; - - ListTreeReader(int columnId, - List types, - boolean[] included, - boolean skipCorrupt) throws IOException { - super(columnId); - OrcProto.Type type = types.get(columnId); - elementReader = createTreeReader(type.getSubtypes(0), types, included, skipCorrupt); - } - - @Override - void seek(PositionProvider[] index) throws IOException { - super.seek(index); - lengths.seek(index[columnId]); - elementReader.seek(index); - } - - @Override - @SuppressWarnings("unchecked") - Object next(Object previous) throws IOException { - super.next(previous); - List result = null; - if (valuePresent) { - if (previous == null) { - result = new ArrayList(); - } else { - result = (ArrayList) previous; - } - int prevLength = result.size(); - int length = (int) lengths.next(); - // extend the list to the new length - for(int i=prevLength; i < length; ++i) { - result.add(null); - } - // read the new elements into the array - for(int i=0; i< length; i++) { - result.set(i, elementReader.next(i < prevLength ? - result.get(i) : null)); - } - // remove any extra elements - for(int i=prevLength - 1; i >= length; --i) { - result.remove(i); - } - } - return result; - } - - @Override - public Object nextVector(Object previous, long batchSize) throws IOException { - throw new UnsupportedOperationException( - "NextVector is not supported operation for List type"); - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && - (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(new StreamName(columnId, - OrcProto.Stream.Kind.LENGTH)), false, false); - if (elementReader != null) { - elementReader.startStripe(streams, stripeFooter); - } - } - - @Override - void skipRows(long items) throws IOException { - items = countNonNulls(items); - long childSkip = 0; - for(long i=0; i < items; ++i) { - childSkip += lengths.next(); - } - elementReader.skipRows(childSkip); - } - } - - private static class MapTreeReader extends TreeReader { - private final TreeReader keyReader; - private final TreeReader valueReader; - private IntegerReader lengths = null; - - MapTreeReader(int columnId, - List types, - boolean[] included, - boolean skipCorrupt) throws IOException { - super(columnId); - OrcProto.Type type = types.get(columnId); - int keyColumn = type.getSubtypes(0); - int valueColumn = type.getSubtypes(1); - if (included == null || included[keyColumn]) { - keyReader = createTreeReader(keyColumn, types, included, skipCorrupt); - } else { - keyReader = null; - } - if (included == null || included[valueColumn]) { - valueReader = createTreeReader(valueColumn, types, included, skipCorrupt); - } else { - valueReader = null; - } - } - - @Override - void seek(PositionProvider[] index) throws IOException { - super.seek(index); - lengths.seek(index[columnId]); - keyReader.seek(index); - valueReader.seek(index); - } - - @Override - @SuppressWarnings("unchecked") - Object next(Object previous) throws IOException { - super.next(previous); - Map result = null; - if (valuePresent) { - if (previous == null) { - result = new LinkedHashMap(); - } else { - result = (LinkedHashMap) previous; - } - // for now just clear and create new objects - result.clear(); - int length = (int) lengths.next(); - // read the new elements into the array - for(int i=0; i< length; i++) { - result.put(keyReader.next(null), valueReader.next(null)); - } - } - return result; - } - - @Override - public Object nextVector(Object previous, long batchSize) throws IOException { - throw new UnsupportedOperationException( - "NextVector is not supported operation for Map type"); - } - - @Override - void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { - if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && - (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { - throw new IOException("Unknown encoding " + encoding + " in column " + - columnId); - } - } - - @Override - void startStripe(Map streams, - OrcProto.StripeFooter stripeFooter - ) throws IOException { - super.startStripe(streams, stripeFooter); - lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), - streams.get(new StreamName(columnId, - OrcProto.Stream.Kind.LENGTH)), false, false); - if (keyReader != null) { - keyReader.startStripe(streams, stripeFooter); - } - if (valueReader != null) { - valueReader.startStripe(streams, stripeFooter); - } - } - - @Override - void skipRows(long items) throws IOException { - items = countNonNulls(items); - long childSkip = 0; - for(long i=0; i < items; ++i) { - childSkip += lengths.next(); - } - keyReader.skipRows(childSkip); - valueReader.skipRows(childSkip); - } - } - - private static TreeReader createTreeReader(int columnId, - List types, - boolean[] included, - boolean skipCorrupt - ) throws IOException { - OrcProto.Type type = types.get(columnId); - switch (type.getKind()) { - case BOOLEAN: - return new BooleanTreeReader(columnId); - case BYTE: - return new ByteTreeReader(columnId); - case DOUBLE: - return new DoubleTreeReader(columnId); - case FLOAT: - return new FloatTreeReader(columnId); - case SHORT: - return new ShortTreeReader(columnId); - case INT: - return new IntTreeReader(columnId); - case LONG: - return new LongTreeReader(columnId, skipCorrupt); - case STRING: - return new StringTreeReader(columnId); - case CHAR: - if (!type.hasMaximumLength()) { - throw new IllegalArgumentException("ORC char type has no length specified"); - } - return new CharTreeReader(columnId, type.getMaximumLength()); - case VARCHAR: - if (!type.hasMaximumLength()) { - throw new IllegalArgumentException("ORC varchar type has no length specified"); - } - return new VarcharTreeReader(columnId, type.getMaximumLength()); - case BINARY: - return new BinaryTreeReader(columnId); - case TIMESTAMP: - return new TimestampTreeReader(columnId, skipCorrupt); - case DATE: - return new DateTreeReader(columnId); - case DECIMAL: - int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION; - int scale = type.hasScale()? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE; - return new DecimalTreeReader(columnId, precision, scale); - case STRUCT: - return new StructTreeReader(columnId, types, included, skipCorrupt); - case LIST: - return new ListTreeReader(columnId, types, included, skipCorrupt); - case MAP: - return new MapTreeReader(columnId, types, included, skipCorrupt); - case UNION: - return new UnionTreeReader(columnId, types, included, skipCorrupt); - default: - throw new IllegalArgumentException("Unsupported type " + - type.getKind()); - } - } - OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException { return metadata.readStripeFooter(stripe); } @@ -2665,10 +385,10 @@ result = evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull); } // in case failed conversion, return the default YES_NO_NULL truth value - } catch (NumberFormatException nfe) { + } catch (Exception e) { if (LOG.isWarnEnabled()) { - LOG.warn("NumberFormatException when type matching predicate object" + - " and statistics object. Exception: " + ExceptionUtils.getStackTrace(nfe)); + LOG.warn("Exception when evaluating predicate. Skipping ORC PPD." + + " Exception: " + ExceptionUtils.getStackTrace(e)); } result = hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; } Index: ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java (working copy) @@ -0,0 +1,2338 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import java.io.EOFException; +import java.io.IOException; +import java.math.BigInteger; +import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.TimeZone; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; + +/** + * Factory for creating ORC tree readers. + */ +public class TreeReaderFactory { + + protected abstract static class TreeReader { + protected final int columnId; + protected BitFieldReader present = null; + protected boolean valuePresent = false; + + TreeReader(int columnId) throws IOException { + this(columnId, null); + } + + TreeReader(int columnId, InStream in) throws IOException { + this.columnId = columnId; + if (in == null) { + present = null; + valuePresent = true; + } else { + present = new BitFieldReader(in, 1); + } + } + + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind, + InStream in, + boolean signed, boolean skipCorrupt) throws IOException { + switch (kind) { + case DIRECT_V2: + case DICTIONARY_V2: + return new RunLengthIntegerReaderV2(in, signed, skipCorrupt); + case DIRECT: + case DICTIONARY: + return new RunLengthIntegerReader(in, signed); + default: + throw new IllegalArgumentException("Unknown encoding " + kind); + } + } + + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + checkEncoding(stripeFooter.getColumnsList().get(columnId)); + InStream in = streams.get(new StreamName(columnId, + OrcProto.Stream.Kind.PRESENT)); + if (in == null) { + present = null; + valuePresent = true; + } else { + present = new BitFieldReader(in, 1); + } + } + + /** + * Seek to the given position. + * + * @param index the indexes loaded from the file + * @throws IOException + */ + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + public void seek(PositionProvider index) throws IOException { + if (present != null) { + present.seek(index); + } + } + + protected long countNonNulls(long rows) throws IOException { + if (present != null) { + long result = 0; + for (long c = 0; c < rows; ++c) { + if (present.next() == 1) { + result += 1; + } + } + return result; + } else { + return rows; + } + } + + abstract void skipRows(long rows) throws IOException; + + Object next(Object previous) throws IOException { + if (present != null) { + valuePresent = present.next() == 1; + } + return previous; + } + + /** + * Populates the isNull vector array in the previousVector object based on + * the present stream values. This function is called from all the child + * readers, and they all set the values based on isNull field value. + * + * @param previousVector The columnVector object whose isNull value is populated + * @param batchSize Size of the column vector + * @return next column vector + * @throws IOException + */ + public Object nextVector(Object previousVector, long batchSize) throws IOException { + ColumnVector result = (ColumnVector) previousVector; + if (present != null) { + // Set noNulls and isNull vector of the ColumnVector based on + // present stream + result.noNulls = true; + for (int i = 0; i < batchSize; i++) { + result.isNull[i] = (present.next() != 1); + if (result.noNulls && result.isNull[i]) { + result.noNulls = false; + } + } + } else { + // There is not present stream, this means that all the values are + // present. + result.noNulls = true; + for (int i = 0; i < batchSize; i++) { + result.isNull[i] = false; + } + } + return previousVector; + } + } + + protected static class BooleanTreeReader extends TreeReader { + protected BitFieldReader reader = null; + + BooleanTreeReader(int columnId) throws IOException { + this(columnId, null, null); + } + + BooleanTreeReader(int columnId, InStream present, InStream data) throws IOException { + super(columnId, present); + if (data != null) { + reader = new BitFieldReader(data, 1); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + reader = new BitFieldReader(streams.get(new StreamName(columnId, + OrcProto.Stream.Kind.DATA)), 1); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + reader.seek(index); + } + + @Override + void skipRows(long items) throws IOException { + reader.skip(countNonNulls(items)); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + BooleanWritable result = null; + if (valuePresent) { + if (previous == null) { + result = new BooleanWritable(); + } else { + result = (BooleanWritable) previous; + } + result.set(reader.next() == 1); + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final LongColumnVector result; + if (previousVector == null) { + result = new LongColumnVector(); + } else { + result = (LongColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + // Read value entries based on isNull entries + reader.nextVector(result, batchSize); + return result; + } + } + + protected static class ByteTreeReader extends TreeReader { + protected RunLengthByteReader reader = null; + + ByteTreeReader(int columnId) throws IOException { + this(columnId, null, null); + } + + ByteTreeReader(int columnId, InStream present, InStream data) throws IOException { + super(columnId, present); + this.reader = new RunLengthByteReader(data); + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + reader = new RunLengthByteReader(streams.get(new StreamName(columnId, + OrcProto.Stream.Kind.DATA))); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + reader.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + ByteWritable result = null; + if (valuePresent) { + if (previous == null) { + result = new ByteWritable(); + } else { + result = (ByteWritable) previous; + } + result.set(reader.next()); + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final LongColumnVector result; + if (previousVector == null) { + result = new LongColumnVector(); + } else { + result = (LongColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + // Read value entries based on isNull entries + reader.nextVector(result, batchSize); + return result; + } + + @Override + void skipRows(long items) throws IOException { + reader.skip(countNonNulls(items)); + } + } + + protected static class ShortTreeReader extends TreeReader { + protected IntegerReader reader = null; + + ShortTreeReader(int columnId) throws IOException { + this(columnId, null, null, null); + } + + ShortTreeReader(int columnId, InStream present, InStream data, + OrcProto.ColumnEncoding encoding) + throws IOException { + super(columnId, present); + if (data != null && encoding != null) { + checkEncoding(encoding); + this.reader = createIntegerReader(encoding.getKind(), data, true, false); + } + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && + (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + StreamName name = new StreamName(columnId, + OrcProto.Stream.Kind.DATA); + reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(name), true, false); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + reader.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + ShortWritable result = null; + if (valuePresent) { + if (previous == null) { + result = new ShortWritable(); + } else { + result = (ShortWritable) previous; + } + result.set((short) reader.next()); + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final LongColumnVector result; + if (previousVector == null) { + result = new LongColumnVector(); + } else { + result = (LongColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + // Read value entries based on isNull entries + reader.nextVector(result, batchSize); + return result; + } + + @Override + void skipRows(long items) throws IOException { + reader.skip(countNonNulls(items)); + } + } + + protected static class IntTreeReader extends TreeReader { + protected IntegerReader reader = null; + + IntTreeReader(int columnId) throws IOException { + this(columnId, null, null, null); + } + + IntTreeReader(int columnId, InStream present, InStream data, + OrcProto.ColumnEncoding encoding) + throws IOException { + super(columnId, present); + if (data != null && encoding != null) { + checkEncoding(encoding); + this.reader = createIntegerReader(encoding.getKind(), data, true, false); + } + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && + (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + StreamName name = new StreamName(columnId, + OrcProto.Stream.Kind.DATA); + reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(name), true, false); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + reader.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + IntWritable result = null; + if (valuePresent) { + if (previous == null) { + result = new IntWritable(); + } else { + result = (IntWritable) previous; + } + result.set((int) reader.next()); + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final LongColumnVector result; + if (previousVector == null) { + result = new LongColumnVector(); + } else { + result = (LongColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + // Read value entries based on isNull entries + reader.nextVector(result, batchSize); + return result; + } + + @Override + void skipRows(long items) throws IOException { + reader.skip(countNonNulls(items)); + } + } + + protected static class LongTreeReader extends TreeReader { + protected IntegerReader reader = null; + + LongTreeReader(int columnId, boolean skipCorrupt) throws IOException { + this(columnId, null, null, null, skipCorrupt); + } + + LongTreeReader(int columnId, InStream present, InStream data, + OrcProto.ColumnEncoding encoding, + boolean skipCorrupt) + throws IOException { + super(columnId, present); + if (data != null && encoding != null) { + checkEncoding(encoding); + this.reader = createIntegerReader(encoding.getKind(), data, true, skipCorrupt); + } + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && + (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + StreamName name = new StreamName(columnId, + OrcProto.Stream.Kind.DATA); + reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(name), true, false); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + reader.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + LongWritable result = null; + if (valuePresent) { + if (previous == null) { + result = new LongWritable(); + } else { + result = (LongWritable) previous; + } + result.set(reader.next()); + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final LongColumnVector result; + if (previousVector == null) { + result = new LongColumnVector(); + } else { + result = (LongColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + // Read value entries based on isNull entries + reader.nextVector(result, batchSize); + return result; + } + + @Override + void skipRows(long items) throws IOException { + reader.skip(countNonNulls(items)); + } + } + + protected static class FloatTreeReader extends TreeReader { + protected InStream stream; + private final SerializationUtils utils; + + FloatTreeReader(int columnId) throws IOException { + this(columnId, null, null); + } + + FloatTreeReader(int columnId, InStream present, InStream data) throws IOException { + super(columnId, present); + this.utils = new SerializationUtils(); + this.stream = data; + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + StreamName name = new StreamName(columnId, + OrcProto.Stream.Kind.DATA); + stream = streams.get(name); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + stream.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + FloatWritable result = null; + if (valuePresent) { + if (previous == null) { + result = new FloatWritable(); + } else { + result = (FloatWritable) previous; + } + result.set(utils.readFloat(stream)); + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final DoubleColumnVector result; + if (previousVector == null) { + result = new DoubleColumnVector(); + } else { + result = (DoubleColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + // Read value entries based on isNull entries + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + result.vector[i] = utils.readFloat(stream); + } else { + + // If the value is not present then set NaN + result.vector[i] = Double.NaN; + } + } + + // Set isRepeating flag + result.isRepeating = true; + for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) { + if (result.vector[i] != result.vector[i + 1]) { + result.isRepeating = false; + } + } + return result; + } + + @Override + protected void skipRows(long items) throws IOException { + items = countNonNulls(items); + for (int i = 0; i < items; ++i) { + utils.readFloat(stream); + } + } + } + + protected static class DoubleTreeReader extends TreeReader { + protected InStream stream; + private final SerializationUtils utils; + + DoubleTreeReader(int columnId) throws IOException { + this(columnId, null, null); + } + + DoubleTreeReader(int columnId, InStream present, InStream data) throws IOException { + super(columnId, present); + this.utils = new SerializationUtils(); + this.stream = data; + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + StreamName name = + new StreamName(columnId, + OrcProto.Stream.Kind.DATA); + stream = streams.get(name); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + stream.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + DoubleWritable result = null; + if (valuePresent) { + if (previous == null) { + result = new DoubleWritable(); + } else { + result = (DoubleWritable) previous; + } + result.set(utils.readDouble(stream)); + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final DoubleColumnVector result; + if (previousVector == null) { + result = new DoubleColumnVector(); + } else { + result = (DoubleColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + // Read value entries based on isNull entries + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + result.vector[i] = utils.readDouble(stream); + } else { + // If the value is not present then set NaN + result.vector[i] = Double.NaN; + } + } + + // Set isRepeating flag + result.isRepeating = true; + for (int i = 0; (i < batchSize - 1 && result.isRepeating); i++) { + if (result.vector[i] != result.vector[i + 1]) { + result.isRepeating = false; + } + } + return result; + } + + @Override + void skipRows(long items) throws IOException { + items = countNonNulls(items); + long len = items * 8; + while (len > 0) { + len -= stream.skip(len); + } + } + } + + protected static class BinaryTreeReader extends TreeReader { + protected InStream stream; + protected IntegerReader lengths = null; + protected final LongColumnVector scratchlcv; + + BinaryTreeReader(int columnId) throws IOException { + this(columnId, null, null, null, null); + } + + BinaryTreeReader(int columnId, InStream present, InStream data, InStream length, + OrcProto.ColumnEncoding encoding) throws IOException { + super(columnId, present); + scratchlcv = new LongColumnVector(); + this.stream = data; + if (length != null && encoding != null) { + checkEncoding(encoding); + this.lengths = createIntegerReader(encoding.getKind(), length, false, false); + } + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && + (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + StreamName name = new StreamName(columnId, + OrcProto.Stream.Kind.DATA); + stream = streams.get(name); + lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + stream.seek(index); + lengths.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + BytesWritable result = null; + if (valuePresent) { + if (previous == null) { + result = new BytesWritable(); + } else { + result = (BytesWritable) previous; + } + int len = (int) lengths.next(); + result.setSize(len); + int offset = 0; + while (len > 0) { + int written = stream.read(result.getBytes(), offset, len); + if (written < 0) { + throw new EOFException("Can't finish byte read from " + stream); + } + len -= written; + offset += written; + } + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final BytesColumnVector result; + if (previousVector == null) { + result = new BytesColumnVector(); + } else { + result = (BytesColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); + return result; + } + + @Override + void skipRows(long items) throws IOException { + items = countNonNulls(items); + long lengthToSkip = 0; + for (int i = 0; i < items; ++i) { + lengthToSkip += lengths.next(); + } + while (lengthToSkip > 0) { + lengthToSkip -= stream.skip(lengthToSkip); + } + } + } + + protected static class TimestampTreeReader extends TreeReader { + protected IntegerReader data = null; + protected IntegerReader nanos = null; + private final boolean skipCorrupt; + private Map baseTimestampMap; + private long base_timestamp; + private final TimeZone readerTimeZone; + private TimeZone writerTimeZone; + private boolean hasSameTZRules; + + TimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException { + this(columnId, null, null, null, null, skipCorrupt); + } + + TimestampTreeReader(int columnId, InStream presentStream, InStream dataStream, + InStream nanosStream, OrcProto.ColumnEncoding encoding, boolean skipCorrupt) + throws IOException { + super(columnId, presentStream); + this.skipCorrupt = skipCorrupt; + this.baseTimestampMap = new HashMap<>(); + this.readerTimeZone = TimeZone.getDefault(); + this.writerTimeZone = readerTimeZone; + this.hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone); + this.base_timestamp = getBaseTimestamp(readerTimeZone.getID()); + if (encoding != null) { + checkEncoding(encoding); + + if (dataStream != null) { + this.data = createIntegerReader(encoding.getKind(), dataStream, true, skipCorrupt); + } + + if (nanosStream != null) { + this.nanos = createIntegerReader(encoding.getKind(), nanosStream, false, skipCorrupt); + } + } + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && + (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + data = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(new StreamName(columnId, + OrcProto.Stream.Kind.DATA)), true, skipCorrupt); + nanos = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(new StreamName(columnId, + OrcProto.Stream.Kind.SECONDARY)), false, skipCorrupt); + base_timestamp = getBaseTimestamp(stripeFooter.getWriterTimezone()); + } + + private long getBaseTimestamp(String timeZoneId) throws IOException { + // to make sure new readers read old files in the same way + if (timeZoneId == null || timeZoneId.isEmpty()) { + timeZoneId = readerTimeZone.getID(); + } + + if (!baseTimestampMap.containsKey(timeZoneId)) { + writerTimeZone = TimeZone.getTimeZone(timeZoneId); + hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone); + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + sdf.setTimeZone(writerTimeZone); + try { + long epoch = + sdf.parse(WriterImpl.BASE_TIMESTAMP_STRING).getTime() / WriterImpl.MILLIS_PER_SECOND; + baseTimestampMap.put(timeZoneId, epoch); + return epoch; + } catch (ParseException e) { + throw new IOException("Unable to create base timestamp", e); + } finally { + sdf.setTimeZone(readerTimeZone); + } + } + + return baseTimestampMap.get(timeZoneId); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + data.seek(index); + nanos.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + TimestampWritable result = null; + if (valuePresent) { + if (previous == null) { + result = new TimestampWritable(); + } else { + result = (TimestampWritable) previous; + } + long millis = (data.next() + base_timestamp) * WriterImpl.MILLIS_PER_SECOND; + int newNanos = parseNanos(nanos.next()); + // fix the rounding when we divided by 1000. + if (millis >= 0) { + millis += newNanos / 1000000; + } else { + millis -= newNanos / 1000000; + } + long offset = 0; + // If reader and writer time zones have different rules, adjust the timezone difference + // between reader and writer taking day light savings into account. + if (!hasSameTZRules) { + offset = writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(millis); + } + long adjustedMillis = millis + offset; + Timestamp ts = new Timestamp(adjustedMillis); + // Sometimes the reader timezone might have changed after adding the adjustedMillis. + // To account for that change, check for any difference in reader timezone after + // adding adjustedMillis. If so use the new offset (offset at adjustedMillis point of time). + if (!hasSameTZRules && + (readerTimeZone.getOffset(millis) != readerTimeZone.getOffset(adjustedMillis))) { + long newOffset = + writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(adjustedMillis); + adjustedMillis = millis + newOffset; + ts.setTime(adjustedMillis); + } + ts.setNanos(newNanos); + result.set(ts); + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final LongColumnVector result; + if (previousVector == null) { + result = new LongColumnVector(); + } else { + result = (LongColumnVector) previousVector; + } + + result.reset(); + Object obj = null; + for (int i = 0; i < batchSize; i++) { + obj = next(obj); + if (obj == null) { + result.noNulls = false; + result.isNull[i] = true; + } else { + TimestampWritable writable = (TimestampWritable) obj; + Timestamp timestamp = writable.getTimestamp(); + result.vector[i] = TimestampUtils.getTimeNanoSec(timestamp); + } + } + + return result; + } + + private static int parseNanos(long serialized) { + int zeros = 7 & (int) serialized; + int result = (int) (serialized >>> 3); + if (zeros != 0) { + for (int i = 0; i <= zeros; ++i) { + result *= 10; + } + } + return result; + } + + @Override + void skipRows(long items) throws IOException { + items = countNonNulls(items); + data.skip(items); + nanos.skip(items); + } + } + + protected static class DateTreeReader extends TreeReader { + protected IntegerReader reader = null; + + DateTreeReader(int columnId) throws IOException { + this(columnId, null, null, null); + } + + DateTreeReader(int columnId, InStream present, InStream data, + OrcProto.ColumnEncoding encoding) throws IOException { + super(columnId, present); + if (data != null && encoding != null) { + checkEncoding(encoding); + reader = createIntegerReader(encoding.getKind(), data, true, false); + } + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && + (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + StreamName name = new StreamName(columnId, + OrcProto.Stream.Kind.DATA); + reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(name), true, false); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + reader.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + DateWritable result = null; + if (valuePresent) { + if (previous == null) { + result = new DateWritable(); + } else { + result = (DateWritable) previous; + } + result.set((int) reader.next()); + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final LongColumnVector result; + if (previousVector == null) { + result = new LongColumnVector(); + } else { + result = (LongColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + // Read value entries based on isNull entries + reader.nextVector(result, batchSize); + return result; + } + + @Override + void skipRows(long items) throws IOException { + reader.skip(countNonNulls(items)); + } + } + + protected static class DecimalTreeReader extends TreeReader { + protected InStream valueStream; + protected IntegerReader scaleReader = null; + private LongColumnVector scratchScaleVector; + + private final int precision; + private final int scale; + + DecimalTreeReader(int columnId, int precision, int scale) throws IOException { + this(columnId, precision, scale, null, null, null, null); + } + + DecimalTreeReader(int columnId, int precision, int scale, InStream present, + InStream valueStream, InStream scaleStream, OrcProto.ColumnEncoding encoding) + throws IOException { + super(columnId, present); + this.precision = precision; + this.scale = scale; + this.scratchScaleVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + this.valueStream = valueStream; + if (scaleStream != null && encoding != null) { + checkEncoding(encoding); + this.scaleReader = createIntegerReader(encoding.getKind(), scaleStream, true, false); + } + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && + (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + valueStream = streams.get(new StreamName(columnId, + OrcProto.Stream.Kind.DATA)); + scaleReader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), true, false); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + valueStream.seek(index); + scaleReader.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + final HiveDecimalWritable result; + if (valuePresent) { + if (previous == null) { + result = new HiveDecimalWritable(); + } else { + result = (HiveDecimalWritable) previous; + } + result.set(HiveDecimal.create(SerializationUtils.readBigInteger(valueStream), + (int) scaleReader.next())); + return HiveDecimalUtils.enforcePrecisionScale(result, precision, scale); + } + return null; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final DecimalColumnVector result; + if (previousVector == null) { + result = new DecimalColumnVector(precision, scale); + } else { + result = (DecimalColumnVector) previousVector; + } + + // Save the reference for isNull in the scratch vector + boolean[] scratchIsNull = scratchScaleVector.isNull; + + // Read present/isNull stream + super.nextVector(result, batchSize); + + // Read value entries based on isNull entries + if (result.isRepeating) { + if (!result.isNull[0]) { + BigInteger bInt = SerializationUtils.readBigInteger(valueStream); + short scaleInData = (short) scaleReader.next(); + HiveDecimal dec = HiveDecimal.create(bInt, scaleInData); + dec = HiveDecimalUtils.enforcePrecisionScale(dec, precision, scale); + result.set(0, dec); + } + } else { + // result vector has isNull values set, use the same to read scale vector. + scratchScaleVector.isNull = result.isNull; + scaleReader.nextVector(scratchScaleVector, batchSize); + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + BigInteger bInt = SerializationUtils.readBigInteger(valueStream); + short scaleInData = (short) scratchScaleVector.vector[i]; + HiveDecimal dec = HiveDecimal.create(bInt, scaleInData); + dec = HiveDecimalUtils.enforcePrecisionScale(dec, precision, scale); + result.set(i, dec); + } + } + } + // Switch back the null vector. + scratchScaleVector.isNull = scratchIsNull; + return result; + } + + @Override + void skipRows(long items) throws IOException { + items = countNonNulls(items); + for (int i = 0; i < items; i++) { + SerializationUtils.readBigInteger(valueStream); + } + scaleReader.skip(items); + } + } + + /** + * A tree reader that will read string columns. At the start of the + * stripe, it creates an internal reader based on whether a direct or + * dictionary encoding was used. + */ + protected static class StringTreeReader extends TreeReader { + protected TreeReader reader; + + StringTreeReader(int columnId) throws IOException { + super(columnId); + } + + StringTreeReader(int columnId, InStream present, InStream data, InStream length, + InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException { + super(columnId, present); + if (encoding != null) { + switch (encoding.getKind()) { + case DIRECT: + case DIRECT_V2: + reader = new StringDirectTreeReader(columnId, present, data, length, + encoding.getKind()); + break; + case DICTIONARY: + case DICTIONARY_V2: + reader = new StringDictionaryTreeReader(columnId, present, data, length, dictionary, + encoding); + break; + default: + throw new IllegalArgumentException("Unsupported encoding " + + encoding.getKind()); + } + } + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + reader.checkEncoding(encoding); + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + // For each stripe, checks the encoding and initializes the appropriate + // reader + switch (stripeFooter.getColumnsList().get(columnId).getKind()) { + case DIRECT: + case DIRECT_V2: + reader = new StringDirectTreeReader(columnId); + break; + case DICTIONARY: + case DICTIONARY_V2: + reader = new StringDictionaryTreeReader(columnId); + break; + default: + throw new IllegalArgumentException("Unsupported encoding " + + stripeFooter.getColumnsList().get(columnId).getKind()); + } + reader.startStripe(streams, stripeFooter); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + reader.seek(index); + } + + @Override + public void seek(PositionProvider index) throws IOException { + reader.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + return reader.next(previous); + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + return reader.nextVector(previousVector, batchSize); + } + + @Override + void skipRows(long items) throws IOException { + reader.skipRows(items); + } + } + + // This class collects together very similar methods for reading an ORC vector of byte arrays and + // creating the BytesColumnVector. + // + public static class BytesColumnVectorUtil { + + private static byte[] commonReadByteArrays(InStream stream, IntegerReader lengths, + LongColumnVector scratchlcv, + BytesColumnVector result, long batchSize) throws IOException { + // Read lengths + scratchlcv.isNull = result.isNull; // Notice we are replacing the isNull vector here... + lengths.nextVector(scratchlcv, batchSize); + int totalLength = 0; + if (!scratchlcv.isRepeating) { + for (int i = 0; i < batchSize; i++) { + if (!scratchlcv.isNull[i]) { + totalLength += (int) scratchlcv.vector[i]; + } + } + } else { + if (!scratchlcv.isNull[0]) { + totalLength = (int) (batchSize * scratchlcv.vector[0]); + } + } + + // Read all the strings for this batch + byte[] allBytes = new byte[totalLength]; + int offset = 0; + int len = totalLength; + while (len > 0) { + int bytesRead = stream.read(allBytes, offset, len); + if (bytesRead < 0) { + throw new EOFException("Can't finish byte read from " + stream); + } + len -= bytesRead; + offset += bytesRead; + } + + return allBytes; + } + + // This method has the common code for reading in bytes into a BytesColumnVector. + public static void readOrcByteArrays(InStream stream, IntegerReader lengths, + LongColumnVector scratchlcv, + BytesColumnVector result, long batchSize) throws IOException { + + byte[] allBytes = commonReadByteArrays(stream, lengths, scratchlcv, result, batchSize); + + // Too expensive to figure out 'repeating' by comparisons. + result.isRepeating = false; + int offset = 0; + if (!scratchlcv.isRepeating) { + for (int i = 0; i < batchSize; i++) { + if (!scratchlcv.isNull[i]) { + result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]); + offset += scratchlcv.vector[i]; + } else { + result.setRef(i, allBytes, 0, 0); + } + } + } else { + for (int i = 0; i < batchSize; i++) { + if (!scratchlcv.isNull[i]) { + result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]); + offset += scratchlcv.vector[0]; + } else { + result.setRef(i, allBytes, 0, 0); + } + } + } + } + } + + /** + * A reader for string columns that are direct encoded in the current + * stripe. + */ + protected static class StringDirectTreeReader extends TreeReader { + protected InStream stream; + protected IntegerReader lengths; + private final LongColumnVector scratchlcv; + + StringDirectTreeReader(int columnId) throws IOException { + this(columnId, null, null, null, null); + } + + StringDirectTreeReader(int columnId, InStream present, InStream data, InStream length, + OrcProto.ColumnEncoding.Kind encoding) throws IOException { + super(columnId, present); + this.scratchlcv = new LongColumnVector(); + this.stream = data; + if (length != null && encoding != null) { + this.lengths = createIntegerReader(encoding, length, false, false); + } + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT && + encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + StreamName name = new StreamName(columnId, + OrcProto.Stream.Kind.DATA); + stream = streams.get(name); + lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), + false, false); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + stream.seek(index); + lengths.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + Text result = null; + if (valuePresent) { + if (previous == null) { + result = new Text(); + } else { + result = (Text) previous; + } + int len = (int) lengths.next(); + int offset = 0; + byte[] bytes = new byte[len]; + while (len > 0) { + int written = stream.read(bytes, offset, len); + if (written < 0) { + throw new EOFException("Can't finish byte read from " + stream); + } + len -= written; + offset += written; + } + result.set(bytes); + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final BytesColumnVector result; + if (previousVector == null) { + result = new BytesColumnVector(); + } else { + result = (BytesColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); + return result; + } + + @Override + void skipRows(long items) throws IOException { + items = countNonNulls(items); + long lengthToSkip = 0; + for (int i = 0; i < items; ++i) { + lengthToSkip += lengths.next(); + } + + while (lengthToSkip > 0) { + lengthToSkip -= stream.skip(lengthToSkip); + } + } + } + + /** + * A reader for string columns that are dictionary encoded in the current + * stripe. + */ + protected static class StringDictionaryTreeReader extends TreeReader { + private DynamicByteArray dictionaryBuffer; + private int[] dictionaryOffsets; + protected IntegerReader reader; + + private byte[] dictionaryBufferInBytesCache = null; + private final LongColumnVector scratchlcv; + + StringDictionaryTreeReader(int columnId) throws IOException { + this(columnId, null, null, null, null, null); + } + + StringDictionaryTreeReader(int columnId, InStream present, InStream data, + InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding) + throws IOException { + super(columnId, present); + scratchlcv = new LongColumnVector(); + if (data != null && encoding != null) { + this.reader = createIntegerReader(encoding.getKind(), data, false, false); + } + + if (dictionary != null && encoding != null) { + readDictionaryStream(dictionary); + } + + if (length != null && encoding != null) { + readDictionaryLengthStream(length, encoding); + } + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY && + encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + + // read the dictionary blob + StreamName name = new StreamName(columnId, + OrcProto.Stream.Kind.DICTIONARY_DATA); + InStream in = streams.get(name); + readDictionaryStream(in); + + // read the lengths + name = new StreamName(columnId, OrcProto.Stream.Kind.LENGTH); + in = streams.get(name); + readDictionaryLengthStream(in, stripeFooter.getColumnsList().get(columnId)); + + // set up the row reader + name = new StreamName(columnId, OrcProto.Stream.Kind.DATA); + reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(name), false, false); + } + + private void readDictionaryLengthStream(InStream in, OrcProto.ColumnEncoding encoding) + throws IOException { + int dictionarySize = encoding.getDictionarySize(); + if (in != null) { // Guard against empty LENGTH stream. + IntegerReader lenReader = createIntegerReader(encoding.getKind(), in, false, false); + int offset = 0; + if (dictionaryOffsets == null || + dictionaryOffsets.length < dictionarySize + 1) { + dictionaryOffsets = new int[dictionarySize + 1]; + } + for (int i = 0; i < dictionarySize; ++i) { + dictionaryOffsets[i] = offset; + offset += (int) lenReader.next(); + } + dictionaryOffsets[dictionarySize] = offset; + in.close(); + } + + } + + private void readDictionaryStream(InStream in) throws IOException { + if (in != null) { // Guard against empty dictionary stream. + if (in.available() > 0) { + dictionaryBuffer = new DynamicByteArray(64, in.available()); + dictionaryBuffer.readAll(in); + // Since its start of strip invalidate the cache. + dictionaryBufferInBytesCache = null; + } + in.close(); + } else { + dictionaryBuffer = null; + } + } + + @Override + void seek(PositionProvider[] index) throws IOException { + seek(index[columnId]); + } + + @Override + public void seek(PositionProvider index) throws IOException { + super.seek(index); + reader.seek(index); + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + Text result = null; + if (valuePresent) { + int entry = (int) reader.next(); + if (previous == null) { + result = new Text(); + } else { + result = (Text) previous; + } + int offset = dictionaryOffsets[entry]; + int length = getDictionaryEntryLength(entry, offset); + // If the column is just empty strings, the size will be zero, + // so the buffer will be null, in that case just return result + // as it will default to empty + if (dictionaryBuffer != null) { + dictionaryBuffer.setText(result, offset, length); + } else { + result.clear(); + } + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final BytesColumnVector result; + int offset; + int length; + if (previousVector == null) { + result = new BytesColumnVector(); + } else { + result = (BytesColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + if (dictionaryBuffer != null) { + + // Load dictionaryBuffer into cache. + if (dictionaryBufferInBytesCache == null) { + dictionaryBufferInBytesCache = dictionaryBuffer.get(); + } + + // Read string offsets + scratchlcv.isNull = result.isNull; + reader.nextVector(scratchlcv, batchSize); + if (!scratchlcv.isRepeating) { + + // The vector has non-repeating strings. Iterate thru the batch + // and set strings one by one + for (int i = 0; i < batchSize; i++) { + if (!scratchlcv.isNull[i]) { + offset = dictionaryOffsets[(int) scratchlcv.vector[i]]; + length = getDictionaryEntryLength((int) scratchlcv.vector[i], offset); + result.setRef(i, dictionaryBufferInBytesCache, offset, length); + } else { + // If the value is null then set offset and length to zero (null string) + result.setRef(i, dictionaryBufferInBytesCache, 0, 0); + } + } + } else { + // If the value is repeating then just set the first value in the + // vector and set the isRepeating flag to true. No need to iterate thru and + // set all the elements to the same value + offset = dictionaryOffsets[(int) scratchlcv.vector[0]]; + length = getDictionaryEntryLength((int) scratchlcv.vector[0], offset); + result.setRef(0, dictionaryBufferInBytesCache, offset, length); + } + result.isRepeating = scratchlcv.isRepeating; + } else { + // Entire stripe contains null strings. + result.isRepeating = true; + result.noNulls = false; + result.isNull[0] = true; + result.setRef(0, "".getBytes(), 0, 0); + } + return result; + } + + int getDictionaryEntryLength(int entry, int offset) { + final int length; + // if it isn't the last entry, subtract the offsets otherwise use + // the buffer length. + if (entry < dictionaryOffsets.length - 1) { + length = dictionaryOffsets[entry + 1] - offset; + } else { + length = dictionaryBuffer.size() - offset; + } + return length; + } + + @Override + void skipRows(long items) throws IOException { + reader.skip(countNonNulls(items)); + } + } + + protected static class CharTreeReader extends StringTreeReader { + int maxLength; + + CharTreeReader(int columnId, int maxLength) throws IOException { + this(columnId, maxLength, null, null, null, null, null); + } + + CharTreeReader(int columnId, int maxLength, InStream present, InStream data, + InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException { + super(columnId, present, data, length, dictionary, encoding); + this.maxLength = maxLength; + } + + @Override + Object next(Object previous) throws IOException { + final HiveCharWritable result; + if (previous == null) { + result = new HiveCharWritable(); + } else { + result = (HiveCharWritable) previous; + } + // Use the string reader implementation to populate the internal Text value + Object textVal = super.next(result.getTextValue()); + if (textVal == null) { + return null; + } + // result should now hold the value that was read in. + // enforce char length + result.enforceMaxLength(maxLength); + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + // Get the vector of strings from StringTreeReader, then make a 2nd pass to + // adjust down the length (right trim and truncate) if necessary. + BytesColumnVector result = (BytesColumnVector) super.nextVector(previousVector, batchSize); + + int adjustedDownLen; + if (result.isRepeating) { + if (result.noNulls || !result.isNull[0]) { + adjustedDownLen = StringExpr + .rightTrimAndTruncate(result.vector[0], result.start[0], result.length[0], maxLength); + if (adjustedDownLen < result.length[0]) { + result.setRef(0, result.vector[0], result.start[0], adjustedDownLen); + } + } + } else { + if (result.noNulls) { + for (int i = 0; i < batchSize; i++) { + adjustedDownLen = StringExpr + .rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i], + maxLength); + if (adjustedDownLen < result.length[i]) { + result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); + } + } + } else { + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + adjustedDownLen = StringExpr + .rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i], + maxLength); + if (adjustedDownLen < result.length[i]) { + result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); + } + } + } + } + } + return result; + } + } + + protected static class VarcharTreeReader extends StringTreeReader { + int maxLength; + + VarcharTreeReader(int columnId, int maxLength) throws IOException { + this(columnId, maxLength, null, null, null, null, null); + } + + VarcharTreeReader(int columnId, int maxLength, InStream present, InStream data, + InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException { + super(columnId, present, data, length, dictionary, encoding); + this.maxLength = maxLength; + } + + @Override + Object next(Object previous) throws IOException { + final HiveVarcharWritable result; + if (previous == null) { + result = new HiveVarcharWritable(); + } else { + result = (HiveVarcharWritable) previous; + } + // Use the string reader implementation to populate the internal Text value + Object textVal = super.next(result.getTextValue()); + if (textVal == null) { + return null; + } + // result should now hold the value that was read in. + // enforce varchar length + result.enforceMaxLength(maxLength); + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + // Get the vector of strings from StringTreeReader, then make a 2nd pass to + // adjust down the length (truncate) if necessary. + BytesColumnVector result = (BytesColumnVector) super.nextVector(previousVector, batchSize); + + int adjustedDownLen; + if (result.isRepeating) { + if (result.noNulls || !result.isNull[0]) { + adjustedDownLen = StringExpr + .truncate(result.vector[0], result.start[0], result.length[0], maxLength); + if (adjustedDownLen < result.length[0]) { + result.setRef(0, result.vector[0], result.start[0], adjustedDownLen); + } + } + } else { + if (result.noNulls) { + for (int i = 0; i < batchSize; i++) { + adjustedDownLen = StringExpr + .truncate(result.vector[i], result.start[i], result.length[i], maxLength); + if (adjustedDownLen < result.length[i]) { + result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); + } + } + } else { + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + adjustedDownLen = StringExpr + .truncate(result.vector[i], result.start[i], result.length[i], maxLength); + if (adjustedDownLen < result.length[i]) { + result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); + } + } + } + } + } + return result; + } + } + + protected static class StructTreeReader extends TreeReader { + protected final TreeReader[] fields; + private final String[] fieldNames; + + StructTreeReader(int columnId, + List types, + boolean[] included, + boolean skipCorrupt) throws IOException { + super(columnId); + OrcProto.Type type = types.get(columnId); + int fieldCount = type.getFieldNamesCount(); + this.fields = new TreeReader[fieldCount]; + this.fieldNames = new String[fieldCount]; + for (int i = 0; i < fieldCount; ++i) { + int subtype = type.getSubtypes(i); + if (included == null || included[subtype]) { + this.fields[i] = createTreeReader(subtype, types, included, skipCorrupt); + } + this.fieldNames[i] = type.getFieldNames(i); + } + } + + @Override + void seek(PositionProvider[] index) throws IOException { + super.seek(index); + for (TreeReader kid : fields) { + if (kid != null) { + kid.seek(index); + } + } + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + OrcStruct result = null; + if (valuePresent) { + if (previous == null) { + result = new OrcStruct(fields.length); + } else { + result = (OrcStruct) previous; + + // If the input format was initialized with a file with a + // different number of fields, the number of fields needs to + // be updated to the correct number + if (result.getNumFields() != fields.length) { + result.setNumFields(fields.length); + } + } + for (int i = 0; i < fields.length; ++i) { + if (fields[i] != null) { + result.setFieldValue(i, fields[i].next(result.getFieldValue(i))); + } + } + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + final ColumnVector[] result; + if (previousVector == null) { + result = new ColumnVector[fields.length]; + } else { + result = (ColumnVector[]) previousVector; + } + + // Read all the members of struct as column vectors + for (int i = 0; i < fields.length; i++) { + if (fields[i] != null) { + if (result[i] == null) { + result[i] = (ColumnVector) fields[i].nextVector(null, batchSize); + } else { + fields[i].nextVector(result[i], batchSize); + } + } + } + return result; + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + for (TreeReader field : fields) { + if (field != null) { + field.startStripe(streams, stripeFooter); + } + } + } + + @Override + void skipRows(long items) throws IOException { + items = countNonNulls(items); + for (TreeReader field : fields) { + if (field != null) { + field.skipRows(items); + } + } + } + } + + protected static class UnionTreeReader extends TreeReader { + protected final TreeReader[] fields; + protected RunLengthByteReader tags; + + UnionTreeReader(int columnId, + List types, + boolean[] included, + boolean skipCorrupt) throws IOException { + super(columnId); + OrcProto.Type type = types.get(columnId); + int fieldCount = type.getSubtypesCount(); + this.fields = new TreeReader[fieldCount]; + for (int i = 0; i < fieldCount; ++i) { + int subtype = type.getSubtypes(i); + if (included == null || included[subtype]) { + this.fields[i] = createTreeReader(subtype, types, included, skipCorrupt); + } + } + } + + @Override + void seek(PositionProvider[] index) throws IOException { + super.seek(index); + tags.seek(index[columnId]); + for (TreeReader kid : fields) { + kid.seek(index); + } + } + + @Override + Object next(Object previous) throws IOException { + super.next(previous); + OrcUnion result = null; + if (valuePresent) { + if (previous == null) { + result = new OrcUnion(); + } else { + result = (OrcUnion) previous; + } + byte tag = tags.next(); + Object previousVal = result.getObject(); + result.set(tag, fields[tag].next(tag == result.getTag() ? + previousVal : null)); + } + return result; + } + + @Override + public Object nextVector(Object previousVector, long batchSize) throws IOException { + throw new UnsupportedOperationException( + "NextVector is not supported operation for Union type"); + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + tags = new RunLengthByteReader(streams.get(new StreamName(columnId, + OrcProto.Stream.Kind.DATA))); + for (TreeReader field : fields) { + if (field != null) { + field.startStripe(streams, stripeFooter); + } + } + } + + @Override + void skipRows(long items) throws IOException { + items = countNonNulls(items); + long[] counts = new long[fields.length]; + for (int i = 0; i < items; ++i) { + counts[tags.next()] += 1; + } + for (int i = 0; i < counts.length; ++i) { + fields[i].skipRows(counts[i]); + } + } + } + + protected static class ListTreeReader extends TreeReader { + protected final TreeReader elementReader; + protected IntegerReader lengths = null; + + ListTreeReader(int columnId, + List types, + boolean[] included, + boolean skipCorrupt) throws IOException { + super(columnId); + OrcProto.Type type = types.get(columnId); + elementReader = createTreeReader(type.getSubtypes(0), types, included, skipCorrupt); + } + + @Override + void seek(PositionProvider[] index) throws IOException { + super.seek(index); + lengths.seek(index[columnId]); + elementReader.seek(index); + } + + @Override + @SuppressWarnings("unchecked") + Object next(Object previous) throws IOException { + super.next(previous); + List result = null; + if (valuePresent) { + if (previous == null) { + result = new ArrayList<>(); + } else { + result = (ArrayList) previous; + } + int prevLength = result.size(); + int length = (int) lengths.next(); + // extend the list to the new length + for (int i = prevLength; i < length; ++i) { + result.add(null); + } + // read the new elements into the array + for (int i = 0; i < length; i++) { + result.set(i, elementReader.next(i < prevLength ? + result.get(i) : null)); + } + // remove any extra elements + for (int i = prevLength - 1; i >= length; --i) { + result.remove(i); + } + } + return result; + } + + @Override + public Object nextVector(Object previous, long batchSize) throws IOException { + throw new UnsupportedOperationException( + "NextVector is not supported operation for List type"); + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && + (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(new StreamName(columnId, + OrcProto.Stream.Kind.LENGTH)), false, false); + if (elementReader != null) { + elementReader.startStripe(streams, stripeFooter); + } + } + + @Override + void skipRows(long items) throws IOException { + items = countNonNulls(items); + long childSkip = 0; + for (long i = 0; i < items; ++i) { + childSkip += lengths.next(); + } + elementReader.skipRows(childSkip); + } + } + + protected static class MapTreeReader extends TreeReader { + protected final TreeReader keyReader; + protected final TreeReader valueReader; + protected IntegerReader lengths = null; + + MapTreeReader(int columnId, + List types, + boolean[] included, + boolean skipCorrupt) throws IOException { + super(columnId); + OrcProto.Type type = types.get(columnId); + int keyColumn = type.getSubtypes(0); + int valueColumn = type.getSubtypes(1); + if (included == null || included[keyColumn]) { + keyReader = createTreeReader(keyColumn, types, included, skipCorrupt); + } else { + keyReader = null; + } + if (included == null || included[valueColumn]) { + valueReader = createTreeReader(valueColumn, types, included, skipCorrupt); + } else { + valueReader = null; + } + } + + @Override + void seek(PositionProvider[] index) throws IOException { + super.seek(index); + lengths.seek(index[columnId]); + keyReader.seek(index); + valueReader.seek(index); + } + + @Override + @SuppressWarnings("unchecked") + Object next(Object previous) throws IOException { + super.next(previous); + Map result = null; + if (valuePresent) { + if (previous == null) { + result = new LinkedHashMap<>(); + } else { + result = (LinkedHashMap) previous; + } + // for now just clear and create new objects + result.clear(); + int length = (int) lengths.next(); + // read the new elements into the array + for (int i = 0; i < length; i++) { + result.put(keyReader.next(null), valueReader.next(null)); + } + } + return result; + } + + @Override + public Object nextVector(Object previous, long batchSize) throws IOException { + throw new UnsupportedOperationException( + "NextVector is not supported operation for Map type"); + } + + @Override + void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { + if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) && + (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) { + throw new IOException("Unknown encoding " + encoding + " in column " + + columnId); + } + } + + @Override + void startStripe(Map streams, + OrcProto.StripeFooter stripeFooter + ) throws IOException { + super.startStripe(streams, stripeFooter); + lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(), + streams.get(new StreamName(columnId, + OrcProto.Stream.Kind.LENGTH)), false, false); + if (keyReader != null) { + keyReader.startStripe(streams, stripeFooter); + } + if (valueReader != null) { + valueReader.startStripe(streams, stripeFooter); + } + } + + @Override + void skipRows(long items) throws IOException { + items = countNonNulls(items); + long childSkip = 0; + for (long i = 0; i < items; ++i) { + childSkip += lengths.next(); + } + keyReader.skipRows(childSkip); + valueReader.skipRows(childSkip); + } + } + + public static TreeReader createTreeReader(int columnId, + List types, + boolean[] included, + boolean skipCorrupt + ) throws IOException { + OrcProto.Type type = types.get(columnId); + switch (type.getKind()) { + case BOOLEAN: + return new BooleanTreeReader(columnId); + case BYTE: + return new ByteTreeReader(columnId); + case DOUBLE: + return new DoubleTreeReader(columnId); + case FLOAT: + return new FloatTreeReader(columnId); + case SHORT: + return new ShortTreeReader(columnId); + case INT: + return new IntTreeReader(columnId); + case LONG: + return new LongTreeReader(columnId, skipCorrupt); + case STRING: + return new StringTreeReader(columnId); + case CHAR: + if (!type.hasMaximumLength()) { + throw new IllegalArgumentException("ORC char type has no length specified"); + } + return new CharTreeReader(columnId, type.getMaximumLength()); + case VARCHAR: + if (!type.hasMaximumLength()) { + throw new IllegalArgumentException("ORC varchar type has no length specified"); + } + return new VarcharTreeReader(columnId, type.getMaximumLength()); + case BINARY: + return new BinaryTreeReader(columnId); + case TIMESTAMP: + return new TimestampTreeReader(columnId, skipCorrupt); + case DATE: + return new DateTreeReader(columnId); + case DECIMAL: + int precision = + type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION; + int scale = type.hasScale() ? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE; + return new DecimalTreeReader(columnId, precision, scale); + case STRUCT: + return new StructTreeReader(columnId, types, included, skipCorrupt); + case LIST: + return new ListTreeReader(columnId, types, included, skipCorrupt); + case MAP: + return new MapTreeReader(columnId, types, included, skipCorrupt); + case UNION: + return new UnionTreeReader(columnId, types, included, skipCorrupt); + default: + throw new IllegalArgumentException("Unsupported type " + + type.getKind()); + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java (working copy) @@ -109,7 +109,9 @@ @Override public boolean next(NullWritable key, VectorizedRowBatch outputBatch) throws IOException { - assert(outputBatch.numCols == assigners.length); + if (assigners != null) { + assert(outputBatch.numCols == assigners.length); + } outputBatch.reset(); int maxSize = outputBatch.getMaxSize(); try { Index: ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java (working copy) @@ -18,7 +18,6 @@ import parquet.io.api.GroupConverter; import parquet.io.api.RecordMaterializer; import parquet.schema.GroupType; -import parquet.schema.MessageType; import parquet.schema.MessageTypeParser; import java.util.Map; @@ -34,7 +33,7 @@ public DataWritableRecordConverter(final GroupType requestedSchema, final Map metadata) { this.root = new HiveStructConverter(requestedSchema, - MessageTypeParser.parseMessageType(metadata.get(DataWritableReadSupport.HIVE_SCHEMA_KEY)), metadata); + MessageTypeParser.parseMessageType(metadata.get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)), metadata); } @Override Index: ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (working copy) @@ -16,6 +16,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.ListIterator; import java.util.Map; import org.apache.hadoop.conf.Configuration; @@ -24,17 +25,21 @@ import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.util.StringUtils; -import parquet.column.ColumnDescriptor; +import parquet.hadoop.api.InitContext; import parquet.hadoop.api.ReadSupport; import parquet.io.api.RecordMaterializer; +import parquet.schema.GroupType; import parquet.schema.MessageType; -import parquet.schema.PrimitiveType; +import parquet.schema.Type; +import parquet.schema.Types; import parquet.schema.PrimitiveType.PrimitiveTypeName; -import parquet.schema.Type; -import parquet.schema.Type.Repetition; /** * @@ -45,8 +50,7 @@ */ public class DataWritableReadSupport extends ReadSupport { - private static final String TABLE_SCHEMA = "table_schema"; - public static final String HIVE_SCHEMA_KEY = "HIVE_TABLE_SCHEMA"; + public static final String HIVE_TABLE_AS_PARQUET_SCHEMA = "HIVE_TABLE_SCHEMA"; public static final String PARQUET_COLUMN_INDEX_ACCESS = "parquet.column.index.access"; /** @@ -56,80 +60,176 @@ * @param columns comma separated list of columns * @return list with virtual columns removed */ - private static List getColumns(final String columns) { + private static List getColumnNames(final String columns) { return (List) VirtualColumn. removeVirtualColumns(StringUtils.getStringCollection(columns)); } /** + * Returns a list of TypeInfo objects from a string which contains column + * types strings. * - * It creates the readContext for Parquet side with the requested schema during the init phase. + * @param types Comma separated list of types + * @return A list of TypeInfo objects. + */ + private static List getColumnTypes(final String types) { + return TypeInfoUtils.getTypeInfosFromTypeString(types); + } + + /** + * Searchs for a fieldName into a parquet GroupType by ignoring string case. + * GroupType#getType(String fieldName) is case sensitive, so we use this method. * - * @param configuration needed to get the wanted columns - * @param keyValueMetaData // unused - * @param fileSchema parquet file schema - * @return the parquet ReadContext + * @param groupType Group of field types where to search for fieldName + * @param fieldName The field what we are searching + * @return The Type object of the field found; null otherwise. */ - @Override - public parquet.hadoop.api.ReadSupport.ReadContext init(final Configuration configuration, - final Map keyValueMetaData, final MessageType fileSchema) { - final String columns = configuration.get(IOConstants.COLUMNS); - final Map contextMetadata = new HashMap(); - final boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false); - if (columns != null) { - final List listColumns = getColumns(columns); - final Map lowerCaseFileSchemaColumns = new HashMap(); - for (ColumnDescriptor c : fileSchema.getColumns()) { - lowerCaseFileSchemaColumns.put(c.getPath()[0].toLowerCase(), c.getPath()[0]); + private static Type getFieldTypeIgnoreCase(GroupType groupType, String fieldName) { + for (Type type : groupType.getFields()) { + if (type.getName().equalsIgnoreCase(fieldName)) { + return type; } - final List typeListTable = new ArrayList(); - if(indexAccess) { - for (int index = 0; index < listColumns.size(); index++) { - //Take columns based on index or pad the field - if(index < fileSchema.getFieldCount()) { - typeListTable.add(fileSchema.getType(index)); - } else { - //prefixing with '_mask_' to ensure no conflict with named - //columns in the file schema - typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "_mask_"+listColumns.get(index))); + } + + return null; + } + + /** + * Searchs column names by name on a given Parquet schema, and returns its corresponded + * Parquet schema types. + * + * @param schema Group schema where to search for column names. + * @param colNames List of column names. + * @param colTypes List of column types. + * @return List of GroupType objects of projected columns. + */ + private static List getProjectedGroupFields(GroupType schema, List colNames, List colTypes) { + List schemaTypes = new ArrayList(); + + ListIterator columnIterator = colNames.listIterator(); + while (columnIterator.hasNext()) { + TypeInfo colType = colTypes.get(columnIterator.nextIndex()); + String colName = (String) columnIterator.next(); + + Type fieldType = getFieldTypeIgnoreCase(schema, colName); + if (fieldType != null) { + if (colType.getCategory() == ObjectInspector.Category.STRUCT) { + if (fieldType.isPrimitive()) { + throw new IllegalStateException("Invalid schema data type, found: PRIMITIVE, expected: STRUCT"); } + + GroupType groupFieldType = fieldType.asGroupType(); + + List groupFields = getProjectedGroupFields( + groupFieldType, + ((StructTypeInfo) colType).getAllStructFieldNames(), + ((StructTypeInfo) colType).getAllStructFieldTypeInfos() + ); + + Type[] typesArray = groupFields.toArray(new Type[0]); + schemaTypes.add(Types.buildGroup(groupFieldType.getRepetition()) + .addFields(typesArray) + .named(fieldType.getName()) + ); + } else { + schemaTypes.add(fieldType); } } else { - for (String col : listColumns) { - col = col.toLowerCase(); - // listColumns contains partition columns which are metadata only - if (lowerCaseFileSchemaColumns.containsKey(col)) { - typeListTable.add(fileSchema.getType(lowerCaseFileSchemaColumns.get(col))); - } else { - // below allows schema evolution - typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col)); - } + // Add type for schema evolution + schemaTypes.add(Types.optional(PrimitiveTypeName.BINARY).named(colName)); + } + } + + return schemaTypes; + } + + /** + * Searchs column names by name on a given Parquet message schema, and returns its projected + * Parquet schema types. + * + * @param schema Message type schema where to search for column names. + * @param colNames List of column names. + * @param colTypes List of column types. + * @return A MessageType object of projected columns. + */ + private static MessageType getSchemaByName(MessageType schema, List colNames, List colTypes) { + List projectedFields = getProjectedGroupFields(schema, colNames, colTypes); + Type[] typesArray = projectedFields.toArray(new Type[0]); + + return Types.buildMessage() + .addFields(typesArray) + .named(schema.getName()); + } + + /** + * Searchs column names by index on a given Parquet file schema, and returns its corresponded + * Parquet schema types. + * + * @param schema Message schema where to search for column names. + * @param colNames List of column names. + * @param colIndexes List of column indexes. + * @return A MessageType object of the column names found. + */ + private static MessageType getSchemaByIndex(MessageType schema, List colNames, List colIndexes) { + List schemaTypes = new ArrayList(); + + for (Integer i : colIndexes) { + if (i < colNames.size()) { + if (i < schema.getFieldCount()) { + schemaTypes.add(schema.getType(i)); + } else { + //prefixing with '_mask_' to ensure no conflict with named + //columns in the file schema + schemaTypes.add(Types.optional(PrimitiveTypeName.BINARY).named("_mask_" + colNames.get(i))); } } - MessageType tableSchema = new MessageType(TABLE_SCHEMA, typeListTable); - contextMetadata.put(HIVE_SCHEMA_KEY, tableSchema.toString()); + } - final List indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration); + return new MessageType(schema.getName(), schemaTypes); + } - final List typeListWanted = new ArrayList(); + /** + * It creates the readContext for Parquet side with the requested schema during the init phase. + * + * @param context + * @return the parquet ReadContext + */ + @Override + public parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) { + Configuration configuration = context.getConfiguration(); + MessageType fileSchema = context.getFileSchema(); + String columnNames = configuration.get(IOConstants.COLUMNS); + Map contextMetadata = new HashMap(); + boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false); - for (final Integer idx : indexColumnsWanted) { - if (idx < listColumns.size()) { - String col = listColumns.get(idx); - if (indexAccess) { - typeListWanted.add(fileSchema.getFields().get(idx)); - } else { - col = col.toLowerCase(); - if (lowerCaseFileSchemaColumns.containsKey(col)) { - typeListWanted.add(tableSchema.getType(lowerCaseFileSchemaColumns.get(col))); - } - } + if (columnNames != null) { + List columnNamesList = getColumnNames(columnNames); + + MessageType tableSchema; + if (indexAccess) { + List indexSequence = new ArrayList(); + + // Generates a sequence list of indexes + for(int i = 0; i < columnNamesList.size(); i++) { + indexSequence.add(i); } + + tableSchema = getSchemaByIndex(fileSchema, columnNamesList, indexSequence); + } else { + String columnTypes = configuration.get(IOConstants.COLUMNS_TYPES); + List columnTypesList = getColumnTypes(columnTypes); + + tableSchema = getSchemaByName(fileSchema, columnNamesList, columnTypesList); } - MessageType requestedSchemaByUser = new MessageType(fileSchema.getName(), typeListWanted); + + contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, tableSchema.toString()); + + List indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration); + MessageType requestedSchemaByUser = getSchemaByIndex(tableSchema, columnNamesList, indexColumnsWanted); + return new ReadContext(requestedSchemaByUser, contextMetadata); } else { - contextMetadata.put(HIVE_SCHEMA_KEY, fileSchema.toString()); + contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, fileSchema.toString()); return new ReadContext(fileSchema, contextMetadata); } } Index: ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java (working copy) @@ -15,7 +15,12 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -38,10 +43,13 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; +import parquet.filter2.compat.FilterCompat; +import parquet.filter2.compat.RowGroupFilter; import parquet.filter2.predicate.FilterPredicate; import parquet.hadoop.ParquetFileReader; import parquet.hadoop.ParquetInputFormat; import parquet.hadoop.ParquetInputSplit; +import parquet.hadoop.api.InitContext; import parquet.hadoop.api.ReadSupport.ReadContext; import parquet.hadoop.metadata.BlockMetaData; import parquet.hadoop.metadata.FileMetaData; @@ -66,6 +74,7 @@ private boolean skipTimestampConversion = false; private JobConf jobConf; private final ProjectionPusher projectionPusher; + private List filtedBlocks; public ParquetRecordReaderWrapper( final ParquetInputFormat newInputFormat, @@ -94,8 +103,6 @@ taskAttemptID = new TaskAttemptID(); } - setFilter(jobConf); - // create a TaskInputOutputContext Configuration conf = jobConf; if (skipTimestampConversion ^ HiveConf.getBoolVar( @@ -130,13 +137,13 @@ } } - public void setFilter(final JobConf conf) { + public FilterCompat.Filter setFilter(final JobConf conf) { String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); String columnNamesString = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR); if (serializedPushdown == null || columnNamesString == null || serializedPushdown.isEmpty() || columnNamesString.isEmpty()) { - return; + return null; } FilterPredicate p = @@ -145,9 +152,11 @@ if (p != null) { LOG.debug("Predicate filter for parquet is " + p.toString()); ParquetInputFormat.setFilterPredicate(conf, p); + return FilterCompat.get(p); } else { LOG.debug("No predicate filter can be generated for " + TableScanDesc.FILTER_EXPR_CONF_STR + " with the value of " + serializedPushdown); + return null; } } @@ -238,15 +247,16 @@ if (oldSplit instanceof FileSplit) { final Path finalPath = ((FileSplit) oldSplit).getPath(); jobConf = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent()); + FilterCompat.Filter filter = setFilter(jobConf); final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath); final List blocks = parquetMetadata.getBlocks(); final FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); - final ReadContext readContext = new DataWritableReadSupport() - .init(jobConf, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema()); + final ReadContext readContext = new DataWritableReadSupport().init(new InitContext(jobConf, + null, fileMetaData.getSchema())); schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata() - .get(DataWritableReadSupport.HIVE_SCHEMA_KEY)).getFieldCount(); + .get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)).getFieldCount(); final List splitGroup = new ArrayList(); final long splitStart = ((FileSplit) oldSplit).getStart(); final long splitLength = ((FileSplit) oldSplit).getLength(); @@ -258,24 +268,43 @@ } if (splitGroup.isEmpty()) { LOG.warn("Skipping split, could not find row group in: " + (FileSplit) oldSplit); - split = null; + return null; + } + + if (filter != null) { + filtedBlocks = RowGroupFilter.filterRowGroups(filter, splitGroup, fileMetaData.getSchema()); + if (filtedBlocks.isEmpty()) { + LOG.debug("All row groups are dropped due to filter predicates"); + return null; + } + + long droppedBlocks = splitGroup.size() - filtedBlocks.size(); + if (droppedBlocks > 0) { + LOG.debug("Dropping " + droppedBlocks + " row groups that do not pass filter predicate"); + } } else { - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) { - skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr"); - } - split = new ParquetInputSplit(finalPath, - splitStart, - splitLength, - ((FileSplit) oldSplit).getLocations(), - splitGroup, - readContext.getRequestedSchema().toString(), - fileMetaData.getSchema().toString(), - fileMetaData.getKeyValueMetaData(), - readContext.getReadSupportMetadata()); + filtedBlocks = splitGroup; } + + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) { + skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr"); + } + split = new ParquetInputSplit(finalPath, + splitStart, + splitLength, + ((FileSplit) oldSplit).getLocations(), + filtedBlocks, + readContext.getRequestedSchema().toString(), + fileMetaData.getSchema().toString(), + fileMetaData.getKeyValueMetaData(), + readContext.getReadSupportMetadata()); + return split; } else { throw new IllegalArgumentException("Unknown split type: " + oldSplit); } - return split; } + + public List getFiltedBlocks() { + return filtedBlocks; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java (working copy) @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.mapred.Mapper; /** @@ -88,7 +89,7 @@ /** * @return the aggKey */ - @Explain(displayName = "Stats Aggregation Key Prefix", normalExplain = false) + @Explain(displayName = "Stats Aggregation Key Prefix", explainLevels = { Level.EXTENDED }) public String getAggKey() { return aggKey; } Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java (working copy) @@ -30,9 +30,10 @@ import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.mapred.Mapper; -@Explain(displayName = "Column Truncate") +@Explain(displayName = "Column Truncate", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ColumnTruncateWork extends MapWork implements Serializable { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java (working copy) @@ -128,7 +128,7 @@ case ORC: // adapt base type to what orc needs if (literal instanceof Integer) { - return Long.valueOf(literal.toString()); + return ((Number) literal).longValue(); } return literal; case PARQUET: @@ -157,9 +157,9 @@ Integer)) { return literalList; } - List result = new ArrayList(); + List result = new ArrayList(literalList.size()); for (Object o : literalList) { - result.add(Long.valueOf(o.toString())); + result.add(((Number) o).longValue()); } return result; } @@ -1114,7 +1114,7 @@ } else if (literal instanceof Byte || literal instanceof Short || literal instanceof Integer) { - return Long.valueOf(literal.toString()); + return ((Number) literal).longValue(); } else if (literal instanceof Float) { // to avoid change in precision when upcasting float to double // we convert the literal to string and parse it as double. (HIVE-8460) Index: ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (working copy) @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.IdentityHashMap; import java.util.List; import java.util.Set; import java.util.Stack; @@ -37,7 +38,7 @@ protected Stack opStack; protected final List toWalk = new ArrayList(); - protected final HashMap retMap = new HashMap(); + protected final IdentityHashMap retMap = new IdentityHashMap(); protected final Dispatcher dispatcher; /** Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (working copy) @@ -132,6 +132,7 @@ * get methods in this class. */ +@SuppressWarnings({"deprecation", "rawtypes"}) public class Hive { static final private Log LOG = LogFactory.getLog("hive.ql.metadata.Hive"); @@ -140,6 +141,9 @@ private IMetaStoreClient metaStoreClient; private UserGroupInformation owner; + // metastore calls timing information + private final Map metaCallTimeMap = new HashMap(); + private static ThreadLocal hiveDB = new ThreadLocal() { @Override protected synchronized Hive initialValue() { @@ -759,6 +763,13 @@ throws HiveException { try { + String tdname = Utilities.getDatabaseName(tableName); + String idname = Utilities.getDatabaseName(indexTblName); + if (!idname.equals(tdname)) { + throw new HiveException("Index on different database (" + idname + + ") from base table (" + tdname + ") is not supported."); + } + Index old_index = null; try { old_index = getIndex(tableName, indexName); @@ -849,9 +860,8 @@ org.apache.hadoop.hive.metastore.api.Table tt = null; HiveIndexHandler indexHandler = HiveUtils.getIndexHandler(this.getConf(), indexHandlerClass); + String itname = Utilities.getTableName(indexTblName); if (indexHandler.usesIndexTable()) { - String idname = Utilities.getDatabaseName(indexTblName); - String itname = Utilities.getTableName(indexTblName); tt = new org.apache.hadoop.hive.ql.metadata.Table(idname, itname).getTTable(); List partKeys = baseTbl.getPartitionKeys(); tt.setPartitionKeys(partKeys); @@ -876,9 +886,6 @@ throw new RuntimeException("Please specify deferred rebuild using \" WITH DEFERRED REBUILD \"."); } - String tdname = Utilities.getDatabaseName(tableName); - String ttname = Utilities.getTableName(tableName); - StorageDescriptor indexSd = new StorageDescriptor( indexTblCols, location, @@ -891,7 +898,8 @@ sortCols, null/*parameters*/); - Index indexDesc = new Index(indexName, indexHandlerClass, tdname, ttname, time, time, indexTblName, + String ttname = Utilities.getTableName(tableName); + Index indexDesc = new Index(indexName, indexHandlerClass, tdname, ttname, time, time, itname, indexSd, new HashMap(), deferredRebuild); if (indexComment != null) { indexDesc.getParameters().put("comment", indexComment); @@ -2411,7 +2419,7 @@ List> result = new ArrayList>(); try { - FileStatus destStatus = !replace && fs.exists(destf) ? fs.getFileStatus(destf) : null; + FileStatus destStatus = !replace ? FileUtils.getFileStatusOrNull(fs, destf) : null; if (destStatus != null && !destStatus.isDir()) { throw new HiveException("checkPaths: destination " + destf + " should be a directory"); @@ -2813,7 +2821,6 @@ List> result = checkPaths(conf, destFs, srcs, srcFs, destf, true); - HadoopShims shims = ShimLoader.getHadoopShims(); if (oldPath != null) { try { FileSystem fs2 = oldPath.getFileSystem(conf); @@ -2976,7 +2983,7 @@ } } }; - return RetryingMetaStoreClient.getProxy(conf, hookLoader, + return RetryingMetaStoreClient.getProxy(conf, hookLoader, metaCallTimeMap, SessionHiveMetaStoreClient.class.getName()); } @@ -3236,4 +3243,37 @@ throw new HiveException(te); } } + + public void clearMetaCallTiming() { + metaCallTimeMap.clear(); + } + + public void dumpAndClearMetaCallTiming(String phase) { + boolean phaseInfoLogged = false; + if (LOG.isDebugEnabled()) { + phaseInfoLogged = logDumpPhase(phase); + LOG.debug("Total time spent in each metastore function (ms): " + metaCallTimeMap); + } + + if (LOG.isInfoEnabled()) { + // print information about calls that took longer time at INFO level + for (Entry callTime : metaCallTimeMap.entrySet()) { + // dump information if call took more than 1 sec (1000ms) + if (callTime.getValue() > 1000) { + if (!phaseInfoLogged) { + phaseInfoLogged = logDumpPhase(phase); + } + LOG.info("Total time spent in this metastore function was greater than 1000ms : " + + callTime); + } + } + } + metaCallTimeMap.clear(); + } + + private boolean logDumpPhase(String phase) { + LOG.info("Dumping metastore api call timing information for : " + phase + " phase"); + return true; + } + }; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java (working copy) @@ -495,17 +495,19 @@ for (int sindex = 0; sindex < smallTblBucketNums.size(); sindex++) { int smallTblBucketNum = smallTblBucketNums.get(sindex); List smallTblFileNames = smallTblFilesList.get(sindex); - if (bigTblBucketNum >= smallTblBucketNum) { - // if the big table has more buckets than the current small table, - // use "MOD" to get small table bucket names. For example, if the big - // table has 4 buckets and the small table has 2 buckets, then the - // mapping should be 0->0, 1->1, 2->0, 3->1. - int toAddSmallIndex = bindex % smallTblBucketNum; - resultFileNames.add(smallTblFileNames.get(toAddSmallIndex)); - } else { - int jump = smallTblBucketNum / bigTblBucketNum; - for (int i = bindex; i < smallTblFileNames.size(); i = i + jump) { - resultFileNames.add(smallTblFileNames.get(i)); + if (smallTblFileNames.size() > 0) { + if (bigTblBucketNum >= smallTblBucketNum) { + // if the big table has more buckets than the current small table, + // use "MOD" to get small table bucket names. For example, if the big + // table has 4 buckets and the small table has 2 buckets, then the + // mapping should be 0->0, 1->1, 2->0, 3->1. + int toAddSmallIndex = bindex % smallTblBucketNum; + resultFileNames.add(smallTblFileNames.get(toAddSmallIndex)); + } else { + int jump = smallTblBucketNum / bigTblBucketNum; + for (int i = bindex; i < smallTblFileNames.size(); i = i + jump) { + resultFileNames.add(smallTblFileNames.get(i)); + } } } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java (working copy) @@ -517,15 +517,10 @@ if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(clz)) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; TypeInfo typeInfo = poi.getTypeInfo(); - // Handling parameterized types (varchar etc). - if (typeInfo.getTypeName().contains(serdeConstants.VARCHAR_TYPE_NAME) - || typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME)) { - - // Do not support parameterized types. - return null; - } o = poi.getPrimitiveJavaObject(o); - if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME)) { + if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME) || + typeInfo.getTypeName().contains(serdeConstants.VARCHAR_TYPE_NAME) || + typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME)) { return new ExprNodeConstantDesc(typeInfo, o); } } else if (PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(clz)) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java (working copy) @@ -163,7 +163,7 @@ // map join operator by default has no bucket cols and num of reduce sinks // reduced by 1 mapJoinOp - .setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks())); +.setOpTraits(new OpTraits(null, -1, null)); mapJoinOp.setStatistics(joinOp.getStatistics()); // propagate this change till the next RS for (Operator childOp : mapJoinOp.getChildOperators()) { @@ -178,8 +178,7 @@ TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException { // we cannot convert to bucket map join, we cannot convert to // map join either based on the size. Check if we can convert to SMB join. - if ((context.conf.getBoolVar(HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN) == false) - || (joinOp.getOpTraits().getNumReduceSinks() >= 2)) { + if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN) == false) { convertJoinSMBJoin(joinOp, context, 0, 0, false); return null; } @@ -254,9 +253,9 @@ CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator) OperatorFactory.get(new CommonMergeJoinDesc(numBuckets, mapJoinConversionPos, mapJoinDesc), joinOp.getSchema()); - int numReduceSinks = joinOp.getOpTraits().getNumReduceSinks(); - OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), numBuckets, joinOp - .getOpTraits().getSortCols(), numReduceSinks); + OpTraits opTraits = + new OpTraits(joinOp.getOpTraits().getBucketColNames(), numBuckets, joinOp.getOpTraits() + .getSortCols()); mergeJoinOp.setOpTraits(opTraits); mergeJoinOp.setStatistics(joinOp.getStatistics()); @@ -321,8 +320,7 @@ if (currentOp instanceof ReduceSinkOperator) { return; } - currentOp.setOpTraits(new OpTraits(null, -1, null, - currentOp.getOpTraits().getNumReduceSinks())); + currentOp.setOpTraits(new OpTraits(null, -1, null)); for (Operator childOp : currentOp.getChildOperators()) { if ((childOp instanceof ReduceSinkOperator) || (childOp instanceof GroupByOperator)) { break; @@ -345,7 +343,7 @@ // we can set the traits for this join operator OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), - tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks()); + tezBucketJoinProcCtx.getNumBuckets(), null); mapJoinOp.setOpTraits(opTraits); mapJoinOp.setStatistics(joinOp.getStatistics()); setNumberOfBucketsOnChildren(mapJoinOp); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MergeJoinProc.java (working copy) @@ -70,12 +70,11 @@ context.opMergeJoinWorkMap.put(mergeJoinOp, mergeWork); } + mergeWork.addMergedWork(null, parentWork, context.leafOperatorToFollowingWork); mergeWork.setMergeJoinOperator(mergeJoinOp); - mergeWork.addMergedWork(null, parentWork); tezWork.setVertexType(mergeWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES); for (BaseWork grandParentWork : tezWork.getParents(parentWork)) { - parentWork.setName(grandParentWork.getName()); TezEdgeProperty edgeProp = tezWork.getEdgeProperty(grandParentWork, parentWork); tezWork.disconnect(grandParentWork, parentWork); tezWork.connect(grandParentWork, mergeWork, edgeProp); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java (working copy) @@ -179,13 +179,44 @@ parentRS.getConf().setReducerTraits(EnumSet.of(FIXED)); numBuckets = (Integer) joinConf.getBigTableBucketNumMapping().values().toArray()[0]; - Operator rootOp = OperatorUtils.findSingleOperatorUpstream(mapJoinOp.getParentOperators() - .get(joinConf.getPosBigTable()), TableScanOperator.class); - - if (rootOp instanceof TableScanOperator) { // we will run in mapper - edgeType = EdgeType.CUSTOM_EDGE; - } else { // we will run in reducer - edgeType = EdgeType.CUSTOM_SIMPLE_EDGE; + /* + * Here, we can be in one of 4 states. + * + * 1. If map join work is null implies that we have not yet traversed the big table side. We + * just need to see if we can find a reduce sink operator in the big table side. This would + * imply a reduce side operation. + * + * 2. If we don't find a reducesink in 1 it has to be the case that it is a map side operation. + * + * 3. If we have already created a work item for the big table side, we need to see if we can + * find a table scan operator in the big table side. This would imply a map side operation. + * + * 4. If we don't find a table scan operator, it has to be a reduce side operation. + */ + if (mapJoinWork == null) { + Operator rootOp = + OperatorUtils.findSingleOperatorUpstream( + mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()), + ReduceSinkOperator.class); + if (rootOp == null) { + // likely we found a table scan operator + edgeType = EdgeType.CUSTOM_EDGE; + } else { + // we have found a reduce sink + edgeType = EdgeType.CUSTOM_SIMPLE_EDGE; + } + } else { + Operator rootOp = + OperatorUtils.findSingleOperatorUpstream( + mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()), + TableScanOperator.class); + if (rootOp != null) { + // likely we found a table scan operator + edgeType = EdgeType.CUSTOM_EDGE; + } else { + // we have found a reduce sink + edgeType = EdgeType.CUSTOM_SIMPLE_EDGE; + } } } TezEdgeProperty edgeProp = new TezEdgeProperty(null, edgeType, numBuckets); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java (working copy) @@ -29,6 +29,16 @@ private static final long serialVersionUID = 1L; + public enum UnsupportedFeature { + Distinct_without_an_aggreggation, Duplicates_in_RR, Filter_expression_with_non_boolean_return_type, + Having_clause_without_any_groupby, Hint, Invalid_column_reference, Invalid_decimal, + Less_than_equal_greater_than, Multi_insert, Others, Same_name_in_multiple_expressions, + Schema_less_table, Select_alias_in_having_clause, Select_transform, Subquery, + Table_sample_clauses, UDTF, Union_type, Unique_join + }; + + private UnsupportedFeature unsupportedFeature; + public CalciteSemanticException() { super(); } @@ -37,6 +47,11 @@ super(message); } + public CalciteSemanticException(String message, UnsupportedFeature feature) { + super(message); + this.setUnsupportedFeature(feature); + } + public CalciteSemanticException(Throwable cause) { super(cause); } @@ -48,4 +63,13 @@ public CalciteSemanticException(ErrorMsg errorMsg, String... msgArgs) { super(errorMsg, msgArgs); } + + public UnsupportedFeature getUnsupportedFeature() { + return unsupportedFeature; + } + + public void setUnsupportedFeature(UnsupportedFeature unsupportedFeature) { + this.unsupportedFeature = unsupportedFeature; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java (working copy) @@ -32,8 +32,16 @@ import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexDynamicParam; +import org.apache.calcite.rex.RexFieldAccess; import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexLocalRef; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexRangeRef; import org.apache.calcite.rex.RexVisitor; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.sql.SqlKind; @@ -535,6 +543,7 @@ boolean deterministic = true; RexVisitor visitor = new RexVisitorImpl(true) { + @Override public Void visitCall(org.apache.calcite.rex.RexCall call) { if (!call.getOperator().isDeterministic()) { throw new Util.FoundOne(call); @@ -551,4 +560,59 @@ return deterministic; } + + /** + * Walks over an expression and determines whether it is constant. + */ + public static class ConstantFinder implements RexVisitor { + + @Override + public Boolean visitLiteral(RexLiteral literal) { + return true; + } + + @Override + public Boolean visitInputRef(RexInputRef inputRef) { + return false; + } + + @Override + public Boolean visitLocalRef(RexLocalRef localRef) { + throw new RuntimeException("Not expected to be called."); + } + + @Override + public Boolean visitOver(RexOver over) { + return false; + } + + @Override + public Boolean visitCorrelVariable(RexCorrelVariable correlVariable) { + return false; + } + + @Override + public Boolean visitDynamicParam(RexDynamicParam dynamicParam) { + return false; + } + + @Override + public Boolean visitCall(RexCall call) { + // Constant if operator is deterministic and all operands are + // constant. + return call.getOperator().isDeterministic() + && RexVisitorImpl.visitArrayAnd(this, call.getOperands()); + } + + @Override + public Boolean visitRangeRef(RexRangeRef rangeRef) { + return false; + } + + @Override + public Boolean visitFieldAccess(RexFieldAccess fieldAccess) { + // ".FIELD" is constant iff "" is constant. + return fieldAccess.getReferenceExpr().accept(this); + } + } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveProject.java (working copy) @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; import com.google.common.collect.ImmutableList; @@ -87,7 +88,7 @@ // 1 Ensure columnNames are unique - CALCITE-411 if (fieldNames != null && !Util.isDistinct(fieldNames)) { String msg = "Select list contains multiple expressions with the same name." + fieldNames; - throw new CalciteSemanticException(msg); + throw new CalciteSemanticException(msg, UnsupportedFeature.Same_name_in_multiple_expressions); } RelDataType rowType = RexUtil.createStructType(cluster.getTypeFactory(), exps, fieldNames); return create(cluster, child, exps, rowType, Collections. emptyList()); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java (working copy) @@ -38,6 +38,9 @@ import org.apache.calcite.rel.rules.MultiJoin; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlKind; @@ -168,7 +171,27 @@ ImmutableMap.Builder inputRefToCallMapBldr = ImmutableMap.builder(); for (int i = resultSchema.size(); i < rt.getFieldCount(); i++) { if (collationInputRefs.contains(i)) { - inputRefToCallMapBldr.put(i, obChild.getChildExps().get(i)); + RexNode obyExpr = obChild.getChildExps().get(i); + if (obyExpr instanceof RexCall) { + int a = -1; + List operands = new ArrayList<>(); + for (int k = 0; k< ((RexCall) obyExpr).operands.size(); k++) { + RexNode rn = ((RexCall) obyExpr).operands.get(k); + for (int j = 0; j < resultSchema.size(); j++) { + if( obChild.getChildExps().get(j).toString().equals(rn.toString())) { + a = j; + break; + } + } if (a != -1) { + operands.add(new RexInputRef(a, rn.getType())); + } else { + operands.add(rn); + } + a = -1; + } + obyExpr = obChild.getCluster().getRexBuilder().makeCall(((RexCall)obyExpr).getOperator(), operands); + } + inputRefToCallMapBldr.put(i, obyExpr); } } ImmutableMap inputRefToCallMap = inputRefToCallMapBldr.build(); @@ -266,7 +289,7 @@ RelNode select = introduceDerivedTable(rel); parent.replaceInput(pos, select); - + return select; } @@ -352,7 +375,7 @@ return validChild; } - + private static boolean isEmptyGrpAggr(RelNode gbNode) { // Verify if both groupset and aggrfunction are empty) Aggregate aggrnode = (Aggregate) gbNode; @@ -361,12 +384,12 @@ } return false; } - + private static void replaceEmptyGroupAggr(final RelNode rel, RelNode parent) { // If this function is called, the parent should only include constant List exps = parent.getChildExps(); for (RexNode rexNode : exps) { - if (rexNode.getKind() != SqlKind.LITERAL) { + if (!rexNode.accept(new HiveCalciteUtil.ConstantFinder())) { throw new RuntimeException("We expect " + parent.toString() + " to contain only constants. However, " + rexNode.toString() + " is " + rexNode.getKind()); @@ -377,7 +400,7 @@ RelDataType longType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, typeFactory); RelDataType intType = TypeConverter.convert(TypeInfoFactory.intTypeInfo, typeFactory); // Create the dummy aggregation. - SqlAggFunction countFn = (SqlAggFunction) SqlFunctionConverter.getCalciteAggFn("count", + SqlAggFunction countFn = SqlFunctionConverter.getCalciteAggFn("count", ImmutableList.of(intType), longType); // TODO: Using 0 might be wrong; might need to walk down to find the // proper index of a dummy. Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java (working copy) @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -148,7 +149,7 @@ // This may happen for schema-less tables, where columns are dynamically // supplied by serdes. throw new CalciteSemanticException("Unexpected rexnode : " - + rexNode.getClass().getCanonicalName()); + + rexNode.getClass().getCanonicalName(), UnsupportedFeature.Schema_less_table); } } @@ -352,7 +353,7 @@ // For now, we will not run CBO in the presence of invalid decimal // literals. throw new CalciteSemanticException("Expression " + literal.getExprString() - + " is not a valid decimal"); + + " is not a valid decimal", UnsupportedFeature.Invalid_decimal); // TODO: return createNullLiteral(literal); } BigDecimal bd = (BigDecimal) value; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java (working copy) @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.ParseDriver; @@ -382,7 +383,7 @@ // We can create Calcite IS_DISTINCT_FROM operator for this. But since our // join reordering algo cant handle this anyway there is no advantage of // this.So, bail out for now. - throw new CalciteSemanticException("<=> is not yet supported for cbo."); + throw new CalciteSemanticException("<=> is not yet supported for cbo.", UnsupportedFeature.Less_than_equal_greater_than); } SqlOperator calciteOp = hiveToCalcite.get(hiveUdfName); if (calciteOp == null) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java (working copy) @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter.HiveToken; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.RowResolver; @@ -228,7 +229,7 @@ public static RelDataType convert(UnionTypeInfo unionType, RelDataTypeFactory dtFactory) throws CalciteSemanticException{ // Union type is not supported in Calcite. - throw new CalciteSemanticException("Union type is not supported"); + throw new CalciteSemanticException("Union type is not supported", UnsupportedFeature.Union_type); } public static TypeInfo convert(RelDataType rType) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java (working copy) @@ -50,19 +50,19 @@ * 1. Bucketing columns. * 2. Table * 3. Pruned partitions - * + * * Bucketing columns refer to not to the bucketing columns from the table object but instead * to the dynamic 'bucketing' done by operators such as reduce sinks and group-bys. * All the operators have a translation from their input names to the output names corresponding * to the bucketing column. The colExprMap that is a part of every operator is used in this * transformation. - * + * * The table object is used for the base-case in map-reduce when deciding to perform a bucket * map join. This object is used in the BucketMapJoinProc to find if number of files for the * table correspond to the number of buckets specified in the meta data. - * + * * The pruned partition information has the same purpose as the table object at the moment. - * + * * The traits of sorted-ness etc. can be populated as well for future optimizations to make use of. */ @@ -106,13 +106,11 @@ List> listBucketCols = new ArrayList>(); listBucketCols.add(bucketCols); int numBuckets = -1; - int numReduceSinks = 1; OpTraits parentOpTraits = rs.getParentOperators().get(0).getConf().getTraits(); if (parentOpTraits != null) { numBuckets = parentOpTraits.getNumBuckets(); - numReduceSinks += parentOpTraits.getNumReduceSinks(); } - OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listBucketCols, numReduceSinks); + OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listBucketCols); rs.setOpTraits(opTraits); return null; } @@ -132,8 +130,8 @@ // construct a mapping of (Partition->bucket file names) and (Partition -> bucket number) if (!partitions.isEmpty()) { for (Partition p : partitions) { - List fileNames = - AbstractBucketJoinProc.getBucketFilePathsOfPartition(p.getDataLocation(), + List fileNames = + AbstractBucketJoinProc.getBucketFilePathsOfPartition(p.getDataLocation(), pGraphContext); // The number of files for the table should be same as number of // buckets. @@ -146,8 +144,8 @@ } } else { - List fileNames = - AbstractBucketJoinProc.getBucketFilePathsOfPartition(tbl.getDataLocation(), + List fileNames = + AbstractBucketJoinProc.getBucketFilePathsOfPartition(tbl.getDataLocation(), pGraphContext); Integer num = new Integer(tbl.getNumBuckets()); @@ -188,7 +186,7 @@ sortedColsList.add(sortCols); } // num reduce sinks hardcoded to 0 because TS has no parents - OpTraits opTraits = new OpTraits(bucketColsList, numBuckets, sortedColsList, 0); + OpTraits opTraits = new OpTraits(bucketColsList, numBuckets, sortedColsList); ts.setOpTraits(opTraits); return null; } @@ -213,13 +211,8 @@ } List> listBucketCols = new ArrayList>(); - int numReduceSinks = 0; - OpTraits parentOpTraits = gbyOp.getParentOperators().get(0).getOpTraits(); - if (parentOpTraits != null) { - numReduceSinks = parentOpTraits.getNumReduceSinks(); - } listBucketCols.add(gbyKeys); - OpTraits opTraits = new OpTraits(listBucketCols, -1, listBucketCols, numReduceSinks); + OpTraits opTraits = new OpTraits(listBucketCols, -1, listBucketCols); gbyOp.setOpTraits(opTraits); return null; } @@ -255,7 +248,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { SelectOperator selOp = (SelectOperator) nd; - List> parentBucketColNames = + List> parentBucketColNames = selOp.getParentOperators().get(0).getOpTraits().getBucketColNames(); List> listBucketCols = null; @@ -264,7 +257,7 @@ if (parentBucketColNames != null) { listBucketCols = getConvertedColNames(parentBucketColNames, selOp); } - List> parentSortColNames = + List> parentSortColNames = selOp.getParentOperators().get(0).getOpTraits().getSortCols(); if (parentSortColNames != null) { listSortCols = getConvertedColNames(parentSortColNames, selOp); @@ -272,13 +265,11 @@ } int numBuckets = -1; - int numReduceSinks = 0; OpTraits parentOpTraits = selOp.getParentOperators().get(0).getOpTraits(); if (parentOpTraits != null) { numBuckets = parentOpTraits.getNumBuckets(); - numReduceSinks = parentOpTraits.getNumReduceSinks(); } - OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listSortCols, numReduceSinks); + OpTraits opTraits = new OpTraits(listBucketCols, numBuckets, listSortCols); selOp.setOpTraits(opTraits); return null; } @@ -307,13 +298,10 @@ OpTraits parentOpTraits = rsOp.getOpTraits(); bucketColsList.add(getOutputColNames(joinOp, parentOpTraits.getBucketColNames(), pos)); sortColsList.add(getOutputColNames(joinOp, parentOpTraits.getSortCols(), pos)); - if (parentOpTraits.getNumReduceSinks() > numReduceSinks) { - numReduceSinks = parentOpTraits.getNumReduceSinks(); - } pos++; } - joinOp.setOpTraits(new OpTraits(bucketColsList, -1, bucketColsList, numReduceSinks)); + joinOp.setOpTraits(new OpTraits(bucketColsList, -1, bucketColsList)); return null; } @@ -366,17 +354,7 @@ Object... nodeOutputs) throws SemanticException { @SuppressWarnings("unchecked") Operator operator = (Operator) nd; - - int numReduceSinks = 0; - for (Operator parentOp : operator.getParentOperators()) { - if (parentOp.getOpTraits() == null) { - continue; - } - if (parentOp.getOpTraits().getNumReduceSinks() > numReduceSinks) { - numReduceSinks = parentOp.getOpTraits().getNumReduceSinks(); - } - } - OpTraits opTraits = new OpTraits(null, -1, null, numReduceSinks); + OpTraits opTraits = new OpTraits(null, -1, null); operator.setOpTraits(opTraits); return null; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (working copy) @@ -136,7 +136,8 @@ Set supportedAggregationUdfs = new HashSet(); - private PhysicalContext physicalContext = null;; + private PhysicalContext physicalContext = null; + private HiveConf hiveConf; public Vectorizer() { @@ -286,13 +287,13 @@ class VectorizationDispatcher implements Dispatcher { - private final PhysicalContext pctx; + private final PhysicalContext physicalContext; private List reduceColumnNames; private List reduceTypeInfos; - public VectorizationDispatcher(PhysicalContext pctx) { - this.pctx = pctx; + public VectorizationDispatcher(PhysicalContext physicalContext) { + this.physicalContext = physicalContext; reduceColumnNames = null; reduceTypeInfos = null; } @@ -310,7 +311,7 @@ convertMapWork((MapWork) w, true); } else if (w instanceof ReduceWork) { // We are only vectorizing Reduce under Tez. - if (HiveConf.getBoolVar(pctx.getConf(), + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { convertReduceWork((ReduceWork) w); } @@ -322,7 +323,7 @@ if (baseWork instanceof MapWork) { convertMapWork((MapWork) baseWork, false); } else if (baseWork instanceof ReduceWork - && HiveConf.getBoolVar(pctx.getConf(), + && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { convertReduceWork((ReduceWork) baseWork); } @@ -393,13 +394,12 @@ HashMap nodeOutput = new HashMap(); ogw.startWalking(topNodes, nodeOutput); - Map> allScratchColumnVectorTypeMaps = vnp.getAllScratchColumnVectorTypeMaps(); - mapWork.setAllScratchColumnVectorTypeMaps(allScratchColumnVectorTypeMaps); - Map> allColumnVectorMaps = vnp.getAllColumnVectorMaps(); - mapWork.setAllColumnVectorMaps(allColumnVectorMaps); + mapWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap()); + mapWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap()); + mapWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap()); if (LOG.isDebugEnabled()) { - debugDisplayAllMaps(allColumnVectorMaps, allScratchColumnVectorTypeMaps); + debugDisplayAllMaps(mapWork); } return; @@ -495,7 +495,7 @@ // VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker. Map opRules = new LinkedHashMap(); ReduceWorkVectorizationNodeProcessor vnp = - new ReduceWorkVectorizationNodeProcessor(reduceColumnNames); + new ReduceWorkVectorizationNodeProcessor(reduceColumnNames, reduceTypeInfos); addReduceWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new PreOrderWalker(disp); @@ -510,14 +510,12 @@ // Necessary since we are vectorizing the root operator in reduce. reduceWork.setReducer(vnp.getRootVectorOp()); - Map> allScratchColumnVectorTypeMaps = vnp.getAllScratchColumnVectorTypeMaps(); - reduceWork.setAllScratchColumnVectorTypeMaps(allScratchColumnVectorTypeMaps); - Map> allColumnVectorMaps = vnp.getAllColumnVectorMaps(); - reduceWork.setAllColumnVectorMaps(allColumnVectorMaps); + reduceWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap()); + reduceWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap()); + reduceWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap()); - if (LOG.isDebugEnabled()) { - debugDisplayAllMaps(allColumnVectorMaps, allScratchColumnVectorTypeMaps); + debugDisplayAllMaps(reduceWork); } } } @@ -574,38 +572,34 @@ // ReduceWorkVectorizationNodeProcessor. class VectorizationNodeProcessor implements NodeProcessor { - // This is used to extract scratch column types for each file key - protected final Map scratchColumnContext = - new HashMap(); + // The vectorization context for the Map or Reduce task. + protected VectorizationContext taskVectorizationContext; - protected final Map, VectorizationContext> vContextsByOp = - new HashMap, VectorizationContext>(); + // The input projection column type name map for the Map or Reduce task. + protected Map taskColumnTypeNameMap; - protected final Set> opsDone = - new HashSet>(); + VectorizationNodeProcessor() { + taskColumnTypeNameMap = new HashMap(); + } - public Map> getAllScratchColumnVectorTypeMaps() { - Map> allScratchColumnVectorTypeMaps = - new HashMap>(); - for (String onefile : scratchColumnContext.keySet()) { - VectorizationContext vc = scratchColumnContext.get(onefile); - Map cmap = vc.getScratchColumnTypeMap(); - allScratchColumnVectorTypeMaps.put(onefile, cmap); - } - return allScratchColumnVectorTypeMaps; + public Map getVectorColumnNameMap() { + return taskVectorizationContext.getProjectionColumnMap(); } - public Map> getAllColumnVectorMaps() { - Map> allColumnVectorMaps = - new HashMap>(); - for(String oneFile: scratchColumnContext.keySet()) { - VectorizationContext vc = scratchColumnContext.get(oneFile); - Map cmap = vc.getProjectionColumnMap(); - allColumnVectorMaps.put(oneFile, cmap); - } - return allColumnVectorMaps; + public Map getVectorColumnTypeMap() { + return taskColumnTypeNameMap; } + public Map getVectorScratchColumnTypeMap() { + return taskVectorizationContext.getScratchColumnTypeMap(); + } + + protected final Set> opsDone = + new HashSet>(); + + protected final Map, Operator> opToVectorOpMap = + new HashMap, Operator>(); + public VectorizationContext walkStackToFindVectorizationContext(Stack stack, Operator op) throws SemanticException { VectorizationContext vContext = null; @@ -622,7 +616,18 @@ return null; } Operator opParent = (Operator) stack.get(i); - vContext = vContextsByOp.get(opParent); + Operator vectorOpParent = opToVectorOpMap.get(opParent); + if (vectorOpParent != null) { + if (vectorOpParent instanceof VectorizationContextRegion) { + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOpParent; + vContext = vcRegion.getOuputVectorizationContext(); + LOG.info("walkStackToFindVectorizationContext " + vectorOpParent.getName() + " has new vectorization context " + vContext.toString()); + } else { + LOG.info("walkStackToFindVectorizationContext " + vectorOpParent.getName() + " does not have new vectorization context"); + } + } else { + LOG.info("walkStackToFindVectorizationContext " + opParent.getName() + " is not vectorized"); + } --i; } return vContext; @@ -636,14 +641,9 @@ vectorOp = vectorizeOperator(op, vContext); opsDone.add(op); if (vectorOp != op) { + opToVectorOpMap.put(op, vectorOp); opsDone.add(vectorOp); } - if (vectorOp instanceof VectorizationContextRegion) { - VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; - VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); - vContextsByOp.put(op, vOutContext); - scratchColumnContext.put(vOutContext.getFileKey(), vOutContext); - } } } catch (HiveException e) { throw new SemanticException(e); @@ -663,6 +663,7 @@ private final MapWork mWork; public MapWorkVectorizationNodeProcessor(MapWork mWork) { + super(); this.mWork = mWork; } @@ -671,41 +672,26 @@ Object... nodeOutputs) throws SemanticException { Operator op = (Operator) nd; - LOG.info("MapWorkVectorizationNodeProcessor processing Operator: " + op.getName() + "..."); VectorizationContext vContext = null; if (op instanceof TableScanOperator) { - vContext = getVectorizationContext(op, physicalContext); - for (String onefile : mWork.getPathToAliases().keySet()) { - List aliases = mWork.getPathToAliases().get(onefile); - for (String alias : aliases) { - Operator opRoot = mWork.getAliasToWork().get(alias); - if (op == opRoot) { - // The same vectorization context is copied multiple times into - // the MapWork scratch columnMap - // Each partition gets a copy - // - vContext.setFileKey(onefile); - scratchColumnContext.put(onefile, vContext); - if (LOG.isDebugEnabled()) { - LOG.debug("Vectorized MapWork operator " + op.getName() + " vectorization context " + vContext.toString()); - } - break; - } - } + if (taskVectorizationContext == null) { + taskVectorizationContext = getVectorizationContext(op.getSchema(), op.getName(), + taskColumnTypeNameMap); } - vContextsByOp.put(op, vContext); + vContext = taskVectorizationContext; } else { + LOG.info("MapWorkVectorizationNodeProcessor process going to walk the operator stack to get vectorization context for " + op.getName()); vContext = walkStackToFindVectorizationContext(stack, op); if (vContext == null) { - throw new SemanticException( - String.format("Did not find vectorization context for operator %s in operator stack", - op.getName())); + // No operator has "pushed" a new context -- so use the task vectorization context. + vContext = taskVectorizationContext; } } assert vContext != null; + LOG.info("MapWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString()); // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't // vectorize the operators below it. @@ -720,9 +706,10 @@ Operator vectorOp = doVectorize(op, vContext); if (LOG.isDebugEnabled()) { - LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " vectorization context " + vContext.toString()); if (vectorOp instanceof VectorizationContextRegion) { - LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added vectorization context " + vContext.toString()); + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; + VectorizationContext vNewContext = vcRegion.getOuputVectorizationContext(); + LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString()); } } @@ -733,19 +720,20 @@ class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor { private final List reduceColumnNames; + private final List reduceTypeInfos; - private VectorizationContext reduceShuffleVectorizationContext; - private Operator rootVectorOp; public Operator getRootVectorOp() { return rootVectorOp; } - public ReduceWorkVectorizationNodeProcessor(List reduceColumnNames) { + public ReduceWorkVectorizationNodeProcessor(List reduceColumnNames, + List reduceTypeInfos) { + super(); this.reduceColumnNames = reduceColumnNames; + this.reduceTypeInfos = reduceTypeInfos; rootVectorOp = null; - reduceShuffleVectorizationContext = null; } @Override @@ -753,8 +741,6 @@ Object... nodeOutputs) throws SemanticException { Operator op = (Operator) nd; - LOG.info("ReduceWorkVectorizationNodeProcessor processing Operator: " + - op.getName() + "..."); VectorizationContext vContext = null; @@ -763,25 +749,30 @@ if (op.getParentOperators().size() == 0) { LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + reduceColumnNames.toString()); - vContext = new VectorizationContext(reduceColumnNames); - vContext.setFileKey("_REDUCE_SHUFFLE_"); - scratchColumnContext.put("_REDUCE_SHUFFLE_", vContext); - reduceShuffleVectorizationContext = vContext; + vContext = new VectorizationContext("__Reduce_Shuffle__", reduceColumnNames); + taskVectorizationContext = vContext; + int i = 0; + for (TypeInfo typeInfo : reduceTypeInfos) { + taskColumnTypeNameMap.put(i, typeInfo.getTypeName()); + i++; + } saveRootVectorOp = true; if (LOG.isDebugEnabled()) { LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context " + vContext.toString()); } } else { + LOG.info("ReduceWorkVectorizationNodeProcessor process going to walk the operator stack to get vectorization context for " + op.getName()); vContext = walkStackToFindVectorizationContext(stack, op); if (vContext == null) { // If we didn't find a context among the operators, assume the top -- reduce shuffle's // vectorization context. - vContext = reduceShuffleVectorizationContext; + vContext = taskVectorizationContext; } } assert vContext != null; + LOG.info("ReduceWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString()); // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't // vectorize the operators below it. @@ -796,9 +787,10 @@ Operator vectorOp = doVectorize(op, vContext); if (LOG.isDebugEnabled()) { - LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " vectorization context " + vContext.toString()); if (vectorOp instanceof VectorizationContextRegion) { - LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vContext.toString()); + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; + VectorizationContext vNewContext = vcRegion.getOuputVectorizationContext(); + LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString()); } } if (vectorOp instanceof VectorGroupByOperator) { @@ -816,7 +808,7 @@ private static class ValidatorVectorizationContext extends VectorizationContext { private ValidatorVectorizationContext() { - super(); + super("No Name"); } @Override @@ -831,25 +823,27 @@ } @Override - public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { - this.physicalContext = pctx; - boolean vectorPath = HiveConf.getBoolVar(pctx.getConf(), + public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticException { + this.physicalContext = physicalContext; + hiveConf = physicalContext.getConf(); + + boolean vectorPath = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); if (!vectorPath) { LOG.info("Vectorization is disabled"); - return pctx; + return physicalContext; } // create dispatcher and graph walker - Dispatcher disp = new VectorizationDispatcher(pctx); + Dispatcher disp = new VectorizationDispatcher(physicalContext); TaskGraphWalker ogw = new TaskGraphWalker(disp); // get all the tasks nodes from root task ArrayList topNodes = new ArrayList(); - topNodes.addAll(pctx.getRootTasks()); + topNodes.addAll(physicalContext.getRootTasks()); // begin to walk through the task tree. ogw.startWalking(topNodes, null); - return pctx; + return physicalContext; } boolean validateMapWorkOperator(Operator op, MapWork mWork, boolean isTez) { @@ -901,7 +895,7 @@ } break; case GROUPBY: - if (HiveConf.getBoolVar(physicalContext.getConf(), + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) { ret = validateGroupByOperator((GroupByOperator) op, true, true); } else { @@ -1262,20 +1256,24 @@ return supportedDataTypesPattern.matcher(type.toLowerCase()).matches(); } - private VectorizationContext getVectorizationContext(Operator op, - PhysicalContext pctx) { - RowSchema rs = op.getSchema(); + private VectorizationContext getVectorizationContext(RowSchema rowSchema, String contextName, + Map typeNameMap) { + VectorizationContext vContext = new VectorizationContext(contextName); + // Add all non-virtual columns to make a vectorization context for // the TableScan operator. - VectorizationContext vContext = new VectorizationContext(); - for (ColumnInfo c : rs.getSignature()) { + int i = 0; + for (ColumnInfo c : rowSchema.getSignature()) { // Earlier, validation code should have eliminated virtual columns usage (HIVE-5560). if (!isVirtualColumn(c)) { vContext.addInitialColumn(c.getInternalName()); + typeNameMap.put(i, c.getTypeName()); + i++; } } vContext.finishedAddingInitialColumns(); + return vContext; } @@ -1333,40 +1331,14 @@ return false; } - public void debugDisplayAllMaps(Map> allColumnVectorMaps, - Map> allScratchColumnVectorTypeMaps) { + public void debugDisplayAllMaps(BaseWork work) { - // Context keys grow in length since they are a path... - Comparator comparerShorterString = new Comparator() { - @Override - public int compare(String o1, String o2) { - Integer length1 = o1.length(); - Integer length2 = o2.length(); - return length1.compareTo(length2); - }}; + Map columnNameMap = work.getVectorColumnNameMap(); + Map columnTypeMap = work.getVectorColumnTypeMap(); + Map scratchColumnTypeMap = work.getVectorScratchColumnTypeMap(); - Comparator comparerInteger = new Comparator() { - @Override - public int compare(Integer o1, Integer o2) { - return o1.compareTo(o2); - }}; - - Map> sortedAllColumnVectorMaps = new TreeMap>(comparerShorterString); - for (Map.Entry> entry : allColumnVectorMaps.entrySet()) { - Map sortedColumnMap = new TreeMap(comparerInteger); - for (Map.Entry innerEntry : entry.getValue().entrySet()) { - sortedColumnMap.put(innerEntry.getValue(), innerEntry.getKey()); - } - sortedAllColumnVectorMaps.put(entry.getKey(), sortedColumnMap); - } - LOG.debug("sortedAllColumnVectorMaps " + sortedAllColumnVectorMaps); - - Map> sortedAllScratchColumnVectorTypeMap = new TreeMap>(comparerShorterString); - for (Map.Entry> entry : allScratchColumnVectorTypeMaps.entrySet()) { - Map sortedScratchColumnTypeMap = new TreeMap(comparerInteger); - sortedScratchColumnTypeMap.putAll(entry.getValue()); - sortedAllScratchColumnVectorTypeMap.put(entry.getKey(), sortedScratchColumnTypeMap); - } - LOG.debug("sortedAllScratchColumnVectorTypeMap " + sortedAllScratchColumnVectorTypeMap); + LOG.debug("debugDisplayAllMaps columnNameMap " + columnNameMap.toString()); + LOG.debug("debugDisplayAllMaps columnTypeMap " + columnTypeMap.toString()); + LOG.debug("debugDisplayAllMaps scratchColumnTypeMap " + scratchColumnTypeMap.toString()); } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (working copy) @@ -196,18 +196,13 @@ // Remove all parts that are not partition columns. See javadoc for details. ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone()); String oldFilter = prunerExpr.getExprString(); - if (isBooleanExpr(compactExpr)) { - // For null and true values, return every partition - if (!isFalseExpr(compactExpr)) { - // Non-strict mode, and all the predicates are on non-partition columns - get everything. - if (LOG.isDebugEnabled()) { - LOG.debug("Filter " + oldFilter + " was null after compacting"); - } - return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap); - } else { - return new PrunedPartitionList(tab, new LinkedHashSet(new ArrayList()), - new ArrayList(), false); - } + if (compactExpr == null || isBooleanExpr(compactExpr)) { + if (isFalseExpr(compactExpr)) { + return new PrunedPartitionList( + tab, new LinkedHashSet(0), new ArrayList(0), false); + } + // For null and true values, return every partition + return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap); } if (LOG.isDebugEnabled()) { LOG.debug("Filter w/ compacting: " + compactExpr.getExprString() @@ -241,22 +236,22 @@ partsCache.put(key, ppList); return ppList; } - + static private boolean isBooleanExpr(ExprNodeDesc expr) { - return expr != null && expr instanceof ExprNodeConstantDesc && + return expr != null && expr instanceof ExprNodeConstantDesc && ((ExprNodeConstantDesc)expr).getTypeInfo() instanceof PrimitiveTypeInfo && ((PrimitiveTypeInfo)(((ExprNodeConstantDesc)expr).getTypeInfo())). - getTypeName().equals(serdeConstants.BOOLEAN_TYPE_NAME); + getTypeName().equals(serdeConstants.BOOLEAN_TYPE_NAME); } static private boolean isTrueExpr(ExprNodeDesc expr) { - return isBooleanExpr(expr) && - ((ExprNodeConstantDesc)expr).getValue() != null && - ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.TRUE); + return isBooleanExpr(expr) && + ((ExprNodeConstantDesc)expr).getValue() != null && + ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.TRUE); } static private boolean isFalseExpr(ExprNodeDesc expr) { - return isBooleanExpr(expr) && + return isBooleanExpr(expr) && ((ExprNodeConstantDesc)expr).getValue() != null && - ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.FALSE); + ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.FALSE); } /** @@ -268,42 +263,48 @@ */ static private ExprNodeDesc compactExpr(ExprNodeDesc expr) { // If this is a constant boolean expression, return the value. - if (expr == null) { - return null; - } - if (expr instanceof ExprNodeConstantDesc) { - if (isBooleanExpr(expr)) { - return expr; - } else { - throw new IllegalStateException("Unexpected non-null ExprNodeConstantDesc: " - + expr.getExprString()); + if (expr == null) { + return null; + } + if (expr instanceof ExprNodeConstantDesc) { + if (((ExprNodeConstantDesc)expr).getValue() == null) return null; + if (!isBooleanExpr(expr)) { + throw new IllegalStateException("Unexpected non-boolean ExprNodeConstantDesc: " + + expr.getExprString()); } + return expr; } else if (expr instanceof ExprNodeGenericFuncDesc) { GenericUDF udf = ((ExprNodeGenericFuncDesc)expr).getGenericUDF(); boolean isAnd = udf instanceof GenericUDFOPAnd; boolean isOr = udf instanceof GenericUDFOPOr; - + if (isAnd || isOr) { List children = expr.getChildren(); - ExprNodeDesc left = children.get(0); - children.set(0, compactExpr(left)); - ExprNodeDesc right = children.get(1); - children.set(1, compactExpr(right)); - - if (isTrueExpr(children.get(0)) && isTrueExpr(children.get(1))) { - return new ExprNodeConstantDesc(Boolean.TRUE); - } else if (isTrueExpr(children.get(0))) { - return isAnd ? children.get(1) : new ExprNodeConstantDesc(Boolean.TRUE); - } else if (isTrueExpr(children.get(1))) { - return isAnd ? children.get(0) : new ExprNodeConstantDesc(Boolean.TRUE); - } else if (isFalseExpr(children.get(0)) && isFalseExpr(children.get(1))) { - return new ExprNodeConstantDesc(Boolean.FALSE); - } else if (isFalseExpr(children.get(0))) { - return isAnd ? new ExprNodeConstantDesc(Boolean.FALSE) : children.get(1); - } else if (isFalseExpr(children.get(1))) { - return isAnd ? new ExprNodeConstantDesc(Boolean.FALSE) : children.get(0); - } - + ExprNodeDesc left = compactExpr(children.get(0)); + ExprNodeDesc right = compactExpr(children.get(1)); + // Non-partition expressions are converted to nulls. + if (left == null && right == null) { + return null; + } else if (left == null) { + return isAnd ? right : null; + } else if (right == null) { + return isAnd ? left : null; + } + // Handle boolean expressions + boolean isLeftFalse = isFalseExpr(left), isRightFalse = isFalseExpr(right), + isLeftTrue = isTrueExpr(left), isRightTrue = isTrueExpr(right); + if ((isRightTrue && isLeftTrue) || (isOr && (isLeftTrue || isRightTrue))) { + return new ExprNodeConstantDesc(Boolean.TRUE); + } else if ((isRightFalse && isLeftFalse) || (isAnd && (isLeftFalse || isRightFalse))) { + return new ExprNodeConstantDesc(Boolean.FALSE); + } else if ((isAnd && isLeftTrue) || (isOr && isLeftFalse)) { + return right; + } else if ((isAnd && isRightTrue) || (isOr && isRightFalse)) { + return left; + } + // Nothing to compact, update expr with compacted children. + children.set(0, left); + children.set(1, right); } return expr; } else { @@ -328,9 +329,9 @@ if (!partCols.contains(column)) { // Column doesn't appear to be a partition column for the table. return new ExprNodeConstantDesc(expr.getTypeInfo(), null); - } + } referred.add(column); - } + } if (expr instanceof ExprNodeGenericFuncDesc) { List children = expr.getChildren(); for (int i = 0; i < children.size(); ++i) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java (working copy) @@ -103,7 +103,7 @@ } // we can set the traits for this join operator - OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null, joinOp.getOpTraits().getNumReduceSinks()); + OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null); mapJoinOp.setOpTraits(opTraits); mapJoinOp.setStatistics(joinOp.getStatistics()); setNumberOfBucketsOnChildren(mapJoinOp); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTablePartMergeFilesDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTablePartMergeFilesDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTablePartMergeFilesDesc.java (working copy) @@ -26,8 +26,10 @@ import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName = "Alter Table Partition Merge Files") + +@Explain(displayName = "Alter Table Partition Merge Files", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class AlterTablePartMergeFilesDesc { private String tableName; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java (working copy) @@ -69,6 +69,7 @@ import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; +import org.apache.calcite.rel.rules.ProjectMergeRule; import org.apache.calcite.rel.rules.ProjectRemoveRule; import org.apache.calcite.rel.rules.ReduceExpressionsRule; import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule; @@ -120,6 +121,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; @@ -248,13 +250,29 @@ disableJoinMerge = false; sinkOp = genPlan(getQB()); LOG.info("CBO Succeeded; optimized logical plan."); + this.ctx.setCboInfo("Plan optimized by CBO."); LOG.debug(newAST.dump()); } catch (Exception e) { boolean isMissingStats = noColsMissingStats.get() > 0; if (isMissingStats) { LOG.error("CBO failed due to missing column stats (see previous errors), skipping CBO"); + this.ctx + .setCboInfo("Plan not optimized by CBO due to missing statistics. Please check log for more details."); } else { LOG.error("CBO failed, skipping CBO. ", e); + if (e instanceof CalciteSemanticException) { + CalciteSemanticException calciteSemanticException = (CalciteSemanticException) e; + UnsupportedFeature unsupportedFeature = calciteSemanticException + .getUnsupportedFeature(); + if (unsupportedFeature != null) { + this.ctx.setCboInfo("Plan not optimized by CBO due to missing feature [" + + unsupportedFeature + "]."); + } else { + this.ctx.setCboInfo("Plan not optimized by CBO."); + } + } else { + this.ctx.setCboInfo("Plan not optimized by CBO."); + } } if (!conf.getBoolVar(ConfVars.HIVE_IN_TEST) || isMissingStats || e instanceof CalciteSemanticException) { @@ -279,6 +297,7 @@ } } } else { + this.ctx.setCboInfo("Plan not optimized by CBO."); skipCalcitePlan = true; } } @@ -721,6 +740,7 @@ hepPgmBldr.addRuleInstance(ReduceExpressionsRule.PROJECT_INSTANCE); hepPgmBldr.addRuleInstance(ProjectRemoveRule.INSTANCE); hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE); + hepPgmBldr.addRuleInstance(new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY)); hepPgm = hepPgmBldr.build(); HepPlanner hepPlanner = new HepPlanner(hepPgm); @@ -785,8 +805,6 @@ // 3. Transitive inference & Partition Pruning basePlan = hepPlan(basePlan, false, mdProvider, new JoinPushTransitivePredicatesRule( Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), - // TODO: Enable it after CALCITE-407 is fixed - // RemoveTrivialProjectRule.INSTANCE, new HivePartitionPruneRule(conf)); // 4. Projection Pruning @@ -1081,7 +1099,7 @@ String msg = String.format("UNIQUE JOIN is currently not supported in CBO," + " turn off cbo to use UNIQUE JOIN."); LOG.debug(msg); - throw new CalciteSemanticException(msg); + throw new CalciteSemanticException(msg, UnsupportedFeature.Unique_join); } // 1. Determine Join Type @@ -1164,7 +1182,7 @@ + " Currently we don't support Table Sample clauses in CBO," + " turn off cbo for queries on tableSamples.", tableAlias); LOG.debug(msg); - throw new CalciteSemanticException(msg); + throw new CalciteSemanticException(msg, UnsupportedFeature.Table_sample_clauses); } // 2. Get Table Metadata @@ -1261,7 +1279,8 @@ // fail on compile time // for such queries, its an arcane corner case, not worth of adding that // complexity. - throw new CalciteSemanticException("Filter expression with non-boolean return type."); + throw new CalciteSemanticException("Filter expression with non-boolean return type.", + UnsupportedFeature.Filter_expression_with_non_boolean_return_type); } ImmutableMap hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap .get(srcRel); @@ -1772,7 +1791,8 @@ grpbyExpr, new TypeCheckCtx(groupByInputRowResolver)); ExprNodeDesc grpbyExprNDesc = astToExprNDescMap.get(grpbyExpr); if (grpbyExprNDesc == null) - throw new CalciteSemanticException("Invalid Column Reference: " + grpbyExpr.dump()); + throw new CalciteSemanticException("Invalid Column Reference: " + grpbyExpr.dump(), + UnsupportedFeature.Invalid_column_reference); addToGBExpr(groupByOutputRowResolver, groupByInputRowResolver, grpbyExpr, grpbyExprNDesc, gbExprNDescLst, outputColumnNames); @@ -1960,7 +1980,8 @@ RowResolver obSyntheticProjectRR = new RowResolver(); if (!RowResolver.add(obSyntheticProjectRR, inputRR)) { throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); } int vcolPos = inputRR.getRowSchema().getSignature().size(); for (Pair astTypePair : vcASTTypePairs) { @@ -1975,20 +1996,23 @@ if (outermostOB) { if (!RowResolver.add(outputRR, inputRR)) { throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); } } else { if (!RowResolver.add(outputRR, obSyntheticProjectRR)) { throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); } originalOBChild = srcRel; } } else { if (!RowResolver.add(outputRR, inputRR)) { throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); } } @@ -2026,7 +2050,8 @@ RowResolver outputRR = new RowResolver(); if (!RowResolver.add(outputRR, relToHiveRR.get(srcRel))) { throw new CalciteSemanticException( - "Duplicates detected when adding columns to RR: see previous message"); + "Duplicates detected when adding columns to RR: see previous message", + UnsupportedFeature.Duplicates_in_RR); } ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap( outputRR, sortRel); @@ -2317,7 +2342,7 @@ String msg = String.format("Hint specified for %s." + " Currently we don't support hints in CBO, turn off cbo to use hints.", hint); LOG.debug(msg); - throw new CalciteSemanticException(msg); + throw new CalciteSemanticException(msg, UnsupportedFeature.Hint); } // 4. Bailout if select involves Transform @@ -2326,7 +2351,7 @@ String msg = String.format("SELECT TRANSFORM is currently not supported in CBO," + " turn off cbo to use TRANSFORM."); LOG.debug(msg); - throw new CalciteSemanticException(msg); + throw new CalciteSemanticException(msg, UnsupportedFeature.Select_transform); } // 5. Bailout if select involves UDTF @@ -2339,7 +2364,7 @@ String msg = String.format("UDTF " + funcName + " is currently not supported in CBO," + " turn off cbo to use UDTF " + funcName); LOG.debug(msg); - throw new CalciteSemanticException(msg); + throw new CalciteSemanticException(msg, UnsupportedFeature.UDTF); } } @@ -2408,7 +2433,8 @@ } else if (expr.toStringTree().contains("TOK_FUNCTIONDI") && !(srcRel instanceof HiveAggregate)) { // Likely a malformed query eg, select hash(distinct c1) from t1; - throw new CalciteSemanticException("Distinct without an aggreggation."); + throw new CalciteSemanticException("Distinct without an aggreggation.", + UnsupportedFeature.Distinct_without_an_aggreggation); } else { // Case when this is an expression TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); @@ -2427,7 +2453,8 @@ .isSkewedCol() : false); if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) { throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "." - + colAlias + " => " + colInfo + " due to duplication, see previous warnings"); + + colAlias + " => " + colInfo + " due to duplication, see previous warnings", + UnsupportedFeature.Duplicates_in_RR); } if (exp instanceof ExprNodeColumnDesc) { @@ -2491,7 +2518,7 @@ if (LOG.isDebugEnabled()) { LOG.debug(msg + " because it: " + reason); } - throw new CalciteSemanticException(msg); + throw new CalciteSemanticException(msg, UnsupportedFeature.Subquery); } // 1. Build Rel For Src (SubQuery, TS, Join) @@ -2520,7 +2547,7 @@ // table // So, for now lets just disable this. Anyway there is nothing much to // optimize in such cases. - throw new CalciteSemanticException("Unsupported"); + throw new CalciteSemanticException("Unsupported", UnsupportedFeature.Others); } // 1.3 process join @@ -2628,7 +2655,8 @@ if (havingClause != null) { if (!(srcRel instanceof HiveAggregate)) { // ill-formed query like select * from t1 having c1 > 0; - throw new CalciteSemanticException("Having clause without any group-by."); + throw new CalciteSemanticException("Having clause without any group-by.", + UnsupportedFeature.Having_clause_without_any_groupby); } validateNoHavingReferenceToAlias(qb, (ASTNode) havingClause.getChild(0)); gbFilter = genFilterRelNode(qb, (ASTNode) havingClause.getChild(0), srcRel, aliasToRel, @@ -2690,7 +2718,7 @@ + " This non standard behavior is not supported with cbo on." + " Turn off cbo for these queries.", aliasToCheck, havingClause); LOG.debug(msg); - throw new CalciteSemanticException(msg); + throw new CalciteSemanticException(msg, UnsupportedFeature.Select_alias_in_having_clause); } } @@ -2726,7 +2754,7 @@ String msg = String.format("Multi Insert is currently not supported in CBO," + " turn off cbo to use Multi Insert."); LOG.debug(msg); - throw new CalciteSemanticException(msg); + throw new CalciteSemanticException(msg, UnsupportedFeature.Multi_insert); } return qbp; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (working copy) @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.serde.serdeConstants; /** * ColumnStatsSemanticAnalyzer. @@ -186,15 +187,7 @@ } else { whereClause.append(" and "); } - whereClause.append(partKey); - whereClause.append(" = "); - if (getColTypeOf(partKey).equalsIgnoreCase("string")) { - whereClause.append("'"); - } - whereClause.append(value); - if (getColTypeOf(partKey).equalsIgnoreCase("string")) { - whereClause.append("'"); - } + whereClause.append(partKey).append(" = ").append(genPartValueString(partKey, value)); } } @@ -211,11 +204,39 @@ return predPresent ? whereClause.append(groupByClause) : groupByClause; } + private String genPartValueString (String partKey, String partVal) throws SemanticException { + String returnVal = partVal; + String partColType = getColTypeOf(partKey); + if (partColType.equals(serdeConstants.STRING_TYPE_NAME) || + partColType.contains(serdeConstants.VARCHAR_TYPE_NAME) || + partColType.contains(serdeConstants.CHAR_TYPE_NAME)) { + returnVal = "'" + partVal + "'"; + } else if (partColType.equals(serdeConstants.TINYINT_TYPE_NAME)) { + returnVal = partVal+"Y"; + } else if (partColType.equals(serdeConstants.SMALLINT_TYPE_NAME)) { + returnVal = partVal+"S"; + } else if (partColType.equals(serdeConstants.INT_TYPE_NAME)) { + returnVal = partVal; + } else if (partColType.equals(serdeConstants.BIGINT_TYPE_NAME)) { + returnVal = partVal+"L"; + } else if (partColType.contains(serdeConstants.DECIMAL_TYPE_NAME)) { + returnVal = partVal + "BD"; + } else if (partColType.equals(serdeConstants.DATE_TYPE_NAME) || + partColType.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { + returnVal = partColType + " '" + partVal + "'"; + } else { + //for other usually not used types, just quote the value + returnVal = "'" + partVal + "'"; + } + + return returnVal; + } + private String getColTypeOf (String partKey) throws SemanticException{ for (FieldSchema fs : tbl.getPartitionKeys()) { if (partKey.equalsIgnoreCase(fs.getName())) { - return fs.getType(); + return fs.getType().toLowerCase(); } } throw new SemanticException ("Unknown partition key : " + partKey); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (working copy) @@ -186,6 +186,8 @@ TokenToTypeName.put(HiveParser.TOK_DATE, serdeConstants.DATE_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATETIME, serdeConstants.DATETIME_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_TIMESTAMP, serdeConstants.TIMESTAMP_TYPE_NAME); + TokenToTypeName.put(HiveParser.TOK_INTERVAL_YEAR_MONTH, serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME); + TokenToTypeName.put(HiveParser.TOK_INTERVAL_DAY_TIME, serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DECIMAL, serdeConstants.DECIMAL_TYPE_NAME); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java (working copy) @@ -91,6 +91,8 @@ pCtx = ((SemanticAnalyzer)sem).getParseContext(); } + boolean userLevelExplain = !extended && !formatted && !dependency && !logical && !authorize + && HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_EXPLAIN_USER); ExplainWork work = new ExplainWork(ctx.getResFile(), pCtx, tasks, @@ -101,7 +103,9 @@ formatted, dependency, logical, - authorize); + authorize, + userLevelExplain, + ctx.getCboInfo()); work.setAppendTaskType( HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES)); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java (working copy) @@ -105,7 +105,7 @@ float minPartitionFactor = context.conf.getFloatVar(HiveConf.ConfVars.TEZ_MIN_PARTITION_FACTOR); long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER); - ReduceWork reduceWork = new ReduceWork("Reducer "+ (++sequenceNumber)); + ReduceWork reduceWork = new ReduceWork(Utilities.REDUCENAME + (++sequenceNumber)); LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root); reduceWork.setReducer(root); reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork)); @@ -180,7 +180,7 @@ public MapWork createMapWork(GenTezProcContext context, Operator root, TezWork tezWork, PrunedPartitionList partitions) throws SemanticException { assert root.getParentOperators().isEmpty(); - MapWork mapWork = new MapWork("Map "+ (++sequenceNumber)); + MapWork mapWork = new MapWork(Utilities.MAPNAME + (++sequenceNumber)); LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root); // map work starts with table scan operators Index: ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java (working copy) @@ -166,7 +166,7 @@ } // connect the work correctly. work.addSortCols(root.getOpTraits().getSortCols().get(0)); - mergeJoinWork.addMergedWork(work, null); + mergeJoinWork.addMergedWork(work, null, context.leafOperatorToFollowingWork); Operator parentOp = getParentFromStack(context.currentMergeJoinOperator, stack); int pos = context.currentMergeJoinOperator.getTagForOperator(parentOp); @@ -268,6 +268,7 @@ if (LOG.isDebugEnabled()) { LOG.debug("Removing " + parent + " as parent from " + root); } + context.leafOperatorToFollowingWork.remove(parent); context.leafOperatorToFollowingWork.put(parent, work); root.removeParent(parent); } @@ -326,7 +327,7 @@ MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork; CommonMergeJoinOperator mergeJoinOp = mergeJoinWork.getMergeJoinOperator(); work.setTag(mergeJoinOp.getTagForOperator(operator)); - mergeJoinWork.addMergedWork(null, work); + mergeJoinWork.addMergedWork(null, work, context.leafOperatorToFollowingWork); tezWork.setVertexType(mergeJoinWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES); for (BaseWork parentWork : tezWork.getParents(work)) { TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work); @@ -399,7 +400,7 @@ return null; } - private int getFollowingWorkIndex(TezWork tezWork, UnionWork unionWork, ReduceSinkOperator rs) + private int getFollowingWorkIndex(TezWork tezWork, UnionWork unionWork, ReduceSinkOperator rs) throws SemanticException { int index = 0; for (BaseWork baseWork : tezWork.getChildren(unionWork)) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -115,6 +115,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.Optimizer; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType; import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher; @@ -1577,13 +1578,16 @@ // Disallow INSERT INTO on bucketized tables boolean isAcid = isAcidTable(tab); - if (qb.getParseInfo().isInsertIntoTable(tab.getDbName(), tab.getTableName()) && + boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(tab.getDbName(), tab.getTableName()); + if (isTableWrittenTo && tab.getNumBuckets() > 0 && !isAcid) { throw new SemanticException(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE. getMsg("Table: " + tab_name)); } // Disallow update and delete on non-acid tables - if ((updating() || deleting()) && !isAcid) { + if ((updating() || deleting()) && !isAcid && isTableWrittenTo) { + //isTableWrittenTo: delete from acidTbl where a in (select id from nonAcidTable) + //so only assert this if we are actually writing to this table // isAcidTable above also checks for whether we are using an acid compliant // transaction manager. But that has already been caught in // UpdateDeleteSemanticAnalyzer, so if we are updating or deleting and getting nonAcid @@ -2992,7 +2996,8 @@ if (ensureUniqueCols) { if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) { throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1] - + " => " + oColInfo + " due to duplication, see previous warnings"); + + " => " + oColInfo + " due to duplication, see previous warnings", + UnsupportedFeature.Duplicates_in_RR); } } else { output.put(tmp[0], tmp[1], oColInfo); @@ -11016,9 +11021,10 @@ // Process the position alias in GROUPBY and ORDERBY private void processPositionAlias(ASTNode ast) throws SemanticException { + boolean isByPos = false; if (HiveConf.getBoolVar(conf, - HiveConf.ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS) == false) { - return; + HiveConf.ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS) == true) { + isByPos = true; } if (ast.getChildCount() == 0) { @@ -11052,15 +11058,20 @@ for (int child_pos = 0; child_pos < groupbyNode.getChildCount(); ++child_pos) { ASTNode node = (ASTNode) groupbyNode.getChild(child_pos); if (node.getToken().getType() == HiveParser.Number) { - int pos = Integer.parseInt(node.getText()); - if (pos > 0 && pos <= selectExpCnt) { - groupbyNode.setChild(child_pos, - selectNode.getChild(pos - 1).getChild(0)); + if (isByPos) { + int pos = Integer.parseInt(node.getText()); + if (pos > 0 && pos <= selectExpCnt) { + groupbyNode.setChild(child_pos, + selectNode.getChild(pos - 1).getChild(0)); + } else { + throw new SemanticException( + ErrorMsg.INVALID_POSITION_ALIAS_IN_GROUPBY.getMsg( + "Position alias: " + pos + " does not exist\n" + + "The Select List is indexed from 1 to " + selectExpCnt)); + } } else { - throw new SemanticException( - ErrorMsg.INVALID_POSITION_ALIAS_IN_GROUPBY.getMsg( - "Position alias: " + pos + " does not exist\n" + - "The Select List is indexed from 1 to " + selectExpCnt)); + warn("Using constant number " + node.getText() + + " in group by. If you try to use position alias when hive.groupby.orderby.position.alias is false, the position alias will be ignored."); } } } @@ -11079,19 +11090,24 @@ ASTNode colNode = (ASTNode) orderbyNode.getChild(child_pos); ASTNode node = (ASTNode) colNode.getChild(0); if (node.getToken().getType() == HiveParser.Number) { - if (!isAllCol) { - int pos = Integer.parseInt(node.getText()); - if (pos > 0 && pos <= selectExpCnt) { - colNode.setChild(0, selectNode.getChild(pos - 1).getChild(0)); + if( isByPos ) { + if (!isAllCol) { + int pos = Integer.parseInt(node.getText()); + if (pos > 0 && pos <= selectExpCnt) { + colNode.setChild(0, selectNode.getChild(pos - 1).getChild(0)); + } else { + throw new SemanticException( + ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg( + "Position alias: " + pos + " does not exist\n" + + "The Select List is indexed from 1 to " + selectExpCnt)); + } } else { throw new SemanticException( - ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg( - "Position alias: " + pos + " does not exist\n" + - "The Select List is indexed from 1 to " + selectExpCnt)); + ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg()); } - } else { - throw new SemanticException( - ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg()); + } else { //if not using position alias and it is a number. + warn("Using constant number " + node.getText() + + " in order by. If you try to use position alias when hive.groupby.orderby.position.alias is false, the position alias will be ignored."); } } } @@ -12092,4 +12108,8 @@ queryProperties.setOuterQueryLimit(qb.getParseInfo().getOuterQueryLimit()); } } + private void warn(String msg) { + SessionState.getConsole().printInfo( + String.format("Warning: %s", msg)); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java (working copy) @@ -140,6 +140,7 @@ } List partCols = mTable.getPartCols(); + List bucketingCols = mTable.getBucketCols(); rewrittenQueryStr.append("insert into table "); rewrittenQueryStr.append(getDotName(tableName)); @@ -199,7 +200,10 @@ } } } - + //updating bucket column should move row from one file to another - not supported + if(bucketingCols != null && bucketingCols.contains(columnName)) { + throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_BUCKET_VALUE,columnName); + } // This means that in UPDATE T SET x = _something_ // _something_ can be whatever is supported in SELECT _something_ setCols.put(columnName, (ASTNode)assignment.getChildren().get(1)); Index: ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java (working copy) @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.hadoop.hive.ql.exec.PTFUtils; +import org.apache.hadoop.hive.ql.plan.Explain.Level; public class AbstractOperatorDesc implements OperatorDesc { @@ -33,9 +34,9 @@ static { PTFUtils.makeTransient(AbstractOperatorDesc.class, "opProps"); } - + @Override - @Explain(skipHeader = true, displayName = "Statistics") + @Explain(skipHeader = true, displayName = "Statistics", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Statistics getStatistics() { return statistics; } @@ -50,14 +51,18 @@ throw new CloneNotSupportedException("clone not supported"); } + public boolean getVectorMode() { + return vectorMode; + } + public void setVectorMode(boolean vm) { this.vectorMode = vm; } - + public OpTraits getTraits() { return opTraits; } - + public void setTraits(OpTraits opTraits) { this.opTraits = opTraits; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/AlterDatabaseDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterDatabaseDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterDatabaseDesc.java (working copy) @@ -21,11 +21,13 @@ import java.io.Serializable; import java.util.Map; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * AlterDatabaseDesc. * */ -@Explain(displayName = "Create Database") +@Explain(displayName = "Create Database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class AlterDatabaseDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -68,7 +70,7 @@ this.dbProperties = dbProps; } - @Explain(displayName="name") + @Explain(displayName="name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getDatabaseName() { return databaseName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java (working copy) @@ -21,11 +21,13 @@ import java.io.Serializable; import java.util.Map; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * AlterIndexDesc. * */ -@Explain(displayName = "Alter Index") +@Explain(displayName = "Alter Index", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class AlterIndexDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; private String indexName; @@ -53,7 +55,7 @@ /** * @return the name of the index */ - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getIndexName() { return indexName; } @@ -69,7 +71,7 @@ /** * @return the baseTable */ - @Explain(displayName = "new name") + @Explain(displayName = "new name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getBaseTableName() { return baseTable; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java (working copy) @@ -32,12 +32,13 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * AlterTableDesc. * */ -@Explain(displayName = "Alter Table") +@Explain(displayName = "Alter Table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class AlterTableDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -261,12 +262,12 @@ this.numberBuckets = numBuckets; } - @Explain(displayName = "new columns") + @Explain(displayName = "new columns", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getNewColsString() { return Utilities.getFieldSchemaString(getNewCols()); } - @Explain(displayName = "type") + @Explain(displayName = "type", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getAlterTableTypeString() { return op.getName(); } @@ -274,7 +275,7 @@ /** * @return the old name of the table */ - @Explain(displayName = "old name") + @Explain(displayName = "old name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getOldName() { return oldName; } @@ -290,7 +291,7 @@ /** * @return the newName */ - @Explain(displayName = "new name") + @Explain(displayName = "new name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getNewName() { return newName; } @@ -368,7 +369,7 @@ /** * @return the input format */ - @Explain(displayName = "input format") + @Explain(displayName = "input format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getInputFormat() { return inputFormat; } @@ -384,7 +385,7 @@ /** * @return the output format */ - @Explain(displayName = "output format") + @Explain(displayName = "output format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getOutputFormat() { return outputFormat; } @@ -400,7 +401,7 @@ /** * @return the storage handler */ - @Explain(displayName = "storage handler") + @Explain(displayName = "storage handler", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getStorageHandler() { return storageHandler; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java (working copy) @@ -19,12 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ArchiveWork. * */ -@Explain(displayName = "Map Reduce") +@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ArchiveWork implements Serializable { private static final long serialVersionUID = 1L; private ArchiveActionType type; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (working copy) @@ -29,12 +29,14 @@ import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * BaseWork. Base class for any "work" that's being done on the cluster. Items like stats * gathering that are commonly used regardless of the type of work live here. */ -@SuppressWarnings({"serial", "deprecation"}) +@SuppressWarnings({"serial"}) public abstract class BaseWork extends AbstractOperatorDesc { // dummyOps is a reference to all the HashTableDummy operators in the @@ -58,9 +60,9 @@ private String name; // Vectorization. - protected Map> allScratchColumnVectorTypeMaps = null; - protected Map> allColumnVectorMaps = null; - protected boolean vectorMode = false; + protected Map vectorColumnNameMap; + protected Map vectorColumnTypeMap; + protected Map vectorScratchColumnTypeMap; public void setGatheringStats(boolean gatherStats) { this.gatheringStats = gatherStats; @@ -142,27 +144,34 @@ return returnSet; } - public Map> getAllScratchColumnVectorTypeMaps() { - return allScratchColumnVectorTypeMaps; + public Map getVectorColumnNameMap() { + return vectorColumnNameMap; } - public void setAllScratchColumnVectorTypeMaps( - Map> allScratchColumnVectorTypeMaps) { - this.allScratchColumnVectorTypeMaps = allScratchColumnVectorTypeMaps; + public void setVectorColumnNameMap(Map vectorColumnNameMap) { + this.vectorColumnNameMap = vectorColumnNameMap; } - public Map> getAllColumnVectorMaps() { - return allColumnVectorMaps; + public Map getVectorColumnTypeMap() { + return vectorColumnTypeMap; } - public void setAllColumnVectorMaps(Map> allColumnVectorMaps) { - this.allColumnVectorMaps = allColumnVectorMaps; + public void setVectorColumnTypeMap(Map vectorColumnTypeMap) { + this.vectorColumnTypeMap = vectorColumnTypeMap; } + public Map getVectorScratchColumnTypeMap() { + return vectorScratchColumnTypeMap; + } + + public void setVectorScratchColumnTypeMap(Map vectorScratchColumnTypeMap) { + this.vectorScratchColumnTypeMap = vectorScratchColumnTypeMap; + } + /** * @return the mapredLocalWork */ - @Explain(displayName = "Local Work") + @Explain(displayName = "Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public MapredLocalWork getMapRedLocalWork() { return mrLocalWork; } @@ -175,15 +184,6 @@ this.mrLocalWork = mapLocalWork; } - @Override - public void setVectorMode(boolean vectorMode) { - this.vectorMode = vectorMode; - } - - public boolean getVectorMode() { - return vectorMode; - } - public abstract void configureJobConf(JobConf job); public void setTag(int tag) { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/BucketMapJoinContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/BucketMapJoinContext.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BucketMapJoinContext.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.ql.exec.BucketMatcher; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * was inner class of MapreLocalWork. context for bucket mapjoin (or smb join) @@ -130,7 +131,7 @@ this.bucketMatcherClass = bucketMatcherClass; } - @Explain(displayName = "Alias Bucket File Name Mapping", normalExplain = false) + @Explain(displayName = "Alias Bucket File Name Mapping", explainLevels = { Level.EXTENDED }) public Map>> getAliasBucketFileNameMapping() { return aliasBucketFileNameMapping; } @@ -149,7 +150,7 @@ } } - @Explain(displayName = "Alias Bucket Base File Name Mapping", normalExplain = false) + @Explain(displayName = "Alias Bucket Base File Name Mapping", explainLevels = { Level.EXTENDED }) public Map>> getAliasBucketBaseFileNameMapping() { return aliasBucketBaseFileNameMapping; } @@ -159,7 +160,7 @@ this.aliasBucketBaseFileNameMapping = aliasBucketBaseFileNameMapping; } - @Explain(displayName = "Alias Bucket Output File Name Mapping", normalExplain = false) + @Explain(displayName = "Alias Bucket Output File Name Mapping", explainLevels = { Level.EXTENDED }) public Map getBucketFileNameMapping() { return bucketFileNameMapping; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/CollectDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/CollectDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CollectDesc.java (working copy) @@ -17,13 +17,15 @@ */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * CollectDesc. * */ -@Explain(displayName = "Collect") +@Explain(displayName = "Collect", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class CollectDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; Integer bufferSize; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java (working copy) @@ -20,6 +20,8 @@ import java.io.Serializable; import java.util.List; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * Contains the information needed to persist column level statistics */ @@ -51,7 +53,7 @@ this.tableName = tableName; } - @Explain(displayName = "Is Table Level Stats", normalExplain=false) + @Explain(displayName = "Is Table Level Stats", explainLevels = { Level.EXTENDED }) public boolean isTblLevel() { return isTblLevel; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java (working copy) @@ -21,7 +21,9 @@ import java.io.Serializable; import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ColumnStatsUpdateWork implementation. ColumnStatsUpdateWork will persist the * colStats into metastore. Work corresponds to statement like ALTER TABLE @@ -30,7 +32,7 @@ * PARTITION(partitionId=100) UPDATE STATISTICS for column value SET * ('maxColLen'='4444','avgColLen'='44.4'); */ -@Explain(displayName = "Column Stats Update Work") +@Explain(displayName = "Column Stats Update Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ColumnStatsUpdateWork implements Serializable { private static final long serialVersionUID = 1L; private ColumnStatsDesc colStats; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java (working copy) @@ -21,12 +21,14 @@ import java.io.Serializable; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ColumnStats Work. * */ -@Explain(displayName = "Column Stats Work") +@Explain(displayName = "Column Stats Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ColumnStatsWork implements Serializable { private static final long serialVersionUID = 1L; private FetchWork fWork; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java (working copy) @@ -19,8 +19,10 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName = "Merge Join Operator") + +@Explain(displayName = "Merge Join Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class CommonMergeJoinDesc extends MapJoinDesc implements Serializable { private static final long serialVersionUID = 1L; private int numBuckets; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/CopyWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/CopyWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CopyWork.java (working copy) @@ -21,12 +21,13 @@ import java.io.Serializable; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * CopyWork. * */ -@Explain(displayName = "Copy") +@Explain(displayName = "Copy", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class CopyWork implements Serializable { private static final long serialVersionUID = 1L; private Path fromPath; @@ -46,12 +47,12 @@ this.setErrorOnSrcEmpty(errorOnSrcEmpty); } - @Explain(displayName = "source") + @Explain(displayName = "source", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Path getFromPath() { return fromPath; } - @Explain(displayName = "destination") + @Explain(displayName = "destination", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Path getToPath() { return toPath; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/CreateDatabaseDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateDatabaseDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateDatabaseDesc.java (working copy) @@ -21,11 +21,13 @@ import java.io.Serializable; import java.util.Map; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * CreateDatabaseDesc. * */ -@Explain(displayName = "Create Database") +@Explain(displayName = "Create Database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class CreateDatabaseDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -75,7 +77,7 @@ this.dbProperties = dbProps; } - @Explain(displayName="name") + @Explain(displayName="name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getName() { return databaseName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/CreateFunctionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateFunctionDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateFunctionDesc.java (working copy) @@ -22,12 +22,13 @@ import java.util.List; import org.apache.hadoop.hive.metastore.api.ResourceUri; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * CreateFunctionDesc. * */ -@Explain(displayName = "Create Function") +@Explain(displayName = "Create Function", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class CreateFunctionDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -50,7 +51,7 @@ this.resources = resources; } - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getFunctionName() { return functionName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/CreateMacroDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateMacroDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateMacroDesc.java (working copy) @@ -21,13 +21,14 @@ import java.io.Serializable; import java.util.List; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * CreateMacroDesc. * */ -@Explain(displayName = "Create Macro") +@Explain(displayName = "Create Macro", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class CreateMacroDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -52,7 +53,7 @@ this.body = body; } - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getMacroName() { return macroName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java (working copy) @@ -40,12 +40,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.mapred.OutputFormat; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * CreateTableDesc. * */ -@Explain(displayName = "Create Table") +@Explain(displayName = "Create Table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class CreateTableDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; private static Log LOG = LogFactory.getLog(CreateTableDesc.class); @@ -140,12 +142,12 @@ return copy == null ? null : new ArrayList(copy); } - @Explain(displayName = "columns") + @Explain(displayName = "columns", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getColsString() { return Utilities.getFieldSchemaString(getCols()); } - @Explain(displayName = "partition columns") + @Explain(displayName = "partition columns", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getPartColsString() { return Utilities.getFieldSchemaString(getPartCols()); } @@ -159,7 +161,7 @@ this.ifNotExists = ifNotExists; } - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { return tableName; } @@ -188,7 +190,7 @@ this.partCols = partCols; } - @Explain(displayName = "bucket columns") + @Explain(displayName = "bucket columns", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getBucketCols() { return bucketCols; } @@ -197,7 +199,7 @@ this.bucketCols = bucketCols; } - @Explain(displayName = "# buckets") + @Explain(displayName = "# buckets", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Integer getNumBucketsExplain() { if (numBuckets == -1) { return null; @@ -268,7 +270,7 @@ this.comment = comment; } - @Explain(displayName = "input format") + @Explain(displayName = "input format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getInputFormat() { return inputFormat; } @@ -277,7 +279,7 @@ this.inputFormat = inputFormat; } - @Explain(displayName = "output format") + @Explain(displayName = "output format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getOutputFormat() { return outputFormat; } @@ -286,7 +288,7 @@ this.outputFormat = outputFormat; } - @Explain(displayName = "storage handler") + @Explain(displayName = "storage handler", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getStorageHandler() { return storageHandler; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableLikeDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableLikeDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableLikeDesc.java (working copy) @@ -21,11 +21,13 @@ import java.io.Serializable; import java.util.Map; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * CreateTableLikeDesc. * */ -@Explain(displayName = "Create Table") +@Explain(displayName = "Create Table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class CreateTableLikeDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; String tableName; @@ -71,7 +73,7 @@ this.ifNotExists = ifNotExists; } - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { return tableName; } @@ -80,7 +82,7 @@ this.tableName = tableName; } - @Explain(displayName = "default input format") + @Explain(displayName = "default input format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getDefaultInputFormat() { return defaultInputFormat; } @@ -89,7 +91,7 @@ this.defaultInputFormat = inputFormat; } - @Explain(displayName = "default output format") + @Explain(displayName = "default output format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getDefaultOutputFormat() { return defaultOutputFormat; } @@ -148,7 +150,7 @@ this.defaultSerdeProps = serdeProps; } - @Explain(displayName = "like") + @Explain(displayName = "like", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getLikeTableName() { return likeTableName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java (working copy) @@ -24,12 +24,14 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * CreateViewDesc. * */ -@Explain(displayName = "Create View") +@Explain(displayName = "Create View", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class CreateViewDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -65,7 +67,7 @@ this.isAlterViewAs = isAlterViewAs; } - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getViewName() { return viewName; } @@ -74,7 +76,7 @@ this.viewName = viewName; } - @Explain(displayName = "original text") + @Explain(displayName = "original text", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getViewOriginalText() { return originalText; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (working copy) @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.parse.AlterTablePartMergeFilesDesc; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * DDLWork. @@ -562,7 +563,7 @@ /** * @return the createTblDesc */ - @Explain(displayName = "Create Table Operator") + @Explain(displayName = "Create Table Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public CreateTableDesc getCreateTblDesc() { return createTblDesc; } @@ -608,7 +609,7 @@ /** * @return the createTblDesc */ - @Explain(displayName = "Create Table Operator") + @Explain(displayName = "Create Table Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public CreateTableLikeDesc getCreateTblLikeDesc() { return createTblLikeDesc; } @@ -624,7 +625,7 @@ /** * @return the createTblDesc */ - @Explain(displayName = "Create View Operator") + @Explain(displayName = "Create View Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public CreateViewDesc getCreateViewDesc() { return createVwDesc; } @@ -640,7 +641,7 @@ /** * @return the dropTblDesc */ - @Explain(displayName = "Drop Table Operator") + @Explain(displayName = "Drop Table Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public DropTableDesc getDropTblDesc() { return dropTblDesc; } @@ -656,7 +657,7 @@ /** * @return the alterTblDesc */ - @Explain(displayName = "Alter Table Operator") + @Explain(displayName = "Alter Table Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public AlterTableDesc getAlterTblDesc() { return alterTblDesc; } @@ -672,7 +673,7 @@ /** * @return the showDatabasesDesc */ - @Explain(displayName = "Show Databases Operator") + @Explain(displayName = "Show Databases Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ShowDatabasesDesc getShowDatabasesDesc() { return showDatabasesDesc; } @@ -688,7 +689,7 @@ /** * @return the showTblsDesc */ - @Explain(displayName = "Show Table Operator") + @Explain(displayName = "Show Table Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ShowTablesDesc getShowTblsDesc() { return showTblsDesc; } @@ -704,7 +705,7 @@ /** * @return the showColumnsDesc */ - @Explain(displayName = "Show Columns Operator") + @Explain(displayName = "Show Columns Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ShowColumnsDesc getShowColumnsDesc() { return showColumnsDesc; } @@ -720,7 +721,7 @@ /** * @return the showFuncsDesc */ - @Explain(displayName = "Show Function Operator") + @Explain(displayName = "Show Function Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ShowFunctionsDesc getShowFuncsDesc() { return showFuncsDesc; } @@ -728,17 +729,17 @@ /** * @return the showLocksDesc */ - @Explain(displayName = "Show Lock Operator") + @Explain(displayName = "Show Lock Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ShowLocksDesc getShowLocksDesc() { return showLocksDesc; } - @Explain(displayName = "Show Compactions Operator") + @Explain(displayName = "Show Compactions Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ShowCompactionsDesc getShowCompactionsDesc() { return showCompactionsDesc; } - @Explain(displayName = "Show Transactions Operator") + @Explain(displayName = "Show Transactions Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ShowTxnsDesc getShowTxnsDesc() { return showTxnsDesc; } @@ -746,7 +747,7 @@ /** * @return the lockTblDesc */ - @Explain(displayName = "Lock Table Operator") + @Explain(displayName = "Lock Table Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public LockTableDesc getLockTblDesc() { return lockTblDesc; } @@ -754,7 +755,7 @@ /** * @return the unlockTblDesc */ - @Explain(displayName = "Unlock Table Operator") + @Explain(displayName = "Unlock Table Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public UnlockTableDesc getUnlockTblDesc() { return unlockTblDesc; } @@ -762,7 +763,7 @@ /** * @return the descFuncDesc */ - @Explain(displayName = "Show Function Operator") + @Explain(displayName = "Show Function Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public DescFunctionDesc getDescFunctionDesc() { return descFunctionDesc; } @@ -818,7 +819,7 @@ /** * @return the showPartsDesc */ - @Explain(displayName = "Show Partitions Operator") + @Explain(displayName = "Show Partitions Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ShowPartitionsDesc getShowPartsDesc() { return showPartsDesc; } @@ -834,7 +835,7 @@ /** * @return the showCreateTblDesc */ - @Explain(displayName = "Show Create Table Operator") + @Explain(displayName = "Show Create Table Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ShowCreateTableDesc getShowCreateTblDesc() { return showCreateTblDesc; } @@ -850,7 +851,7 @@ /** * @return the showIndexesDesc */ - @Explain(displayName = "Show Index Operator") + @Explain(displayName = "Show Index Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ShowIndexesDesc getShowIndexesDesc() { return showIndexesDesc; } @@ -862,7 +863,7 @@ /** * @return the descTblDesc */ - @Explain(displayName = "Describe Table Operator") + @Explain(displayName = "Describe Table Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public DescTableDesc getDescTblDesc() { return descTblDesc; } @@ -878,7 +879,7 @@ /** * @return information about the partitions we want to add. */ - @Explain(displayName = "Add Partition Operator") + @Explain(displayName = "Add Partition Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public AddPartitionDesc getAddPartitionDesc() { return addPartitionDesc; } @@ -1101,7 +1102,7 @@ this.alterTableAlterPartDesc = alterPartitionDesc; } - @Explain(displayName = "Truncate Table Operator") + @Explain(displayName = "Truncate Table Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public TruncateTableDesc getTruncateTblDesc() { return truncateTblDesc; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DemuxDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DemuxDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DemuxDesc.java (working copy) @@ -20,13 +20,15 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * Demux operator descriptor implementation. * */ -@Explain(displayName = "Demux Operator") +@Explain(displayName = "Demux Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class DemuxDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DependencyCollectionWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DependencyCollectionWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DependencyCollectionWork.java (working copy) @@ -19,12 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * DependencyCollectionWork * */ -@Explain(displayName = "Dependency Collection") +@Explain(displayName = "Dependency Collection", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class DependencyCollectionWork implements Serializable { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DescDatabaseDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DescDatabaseDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DescDatabaseDesc.java (working copy) @@ -21,12 +21,13 @@ import java.io.Serializable; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * DescDatabaseDesc. * */ -@Explain(displayName = "Describe Database") +@Explain(displayName = "Describe Database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class DescDatabaseDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -76,7 +77,7 @@ /** * @return the tableName */ - @Explain(displayName = "database") + @Explain(displayName = "database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getDatabaseName() { return dbName; } @@ -92,7 +93,7 @@ /** * @return the resFile */ - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFile() { return resFile; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DescFunctionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DescFunctionDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DescFunctionDesc.java (working copy) @@ -21,12 +21,13 @@ import java.io.Serializable; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * DescFunctionDesc. * */ -@Explain(displayName = "Describe Function") +@Explain(displayName = "Describe Function", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class DescFunctionDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; String name; @@ -82,7 +83,7 @@ /** * @return the name */ - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getName() { return name; } @@ -98,7 +99,7 @@ /** * @return the resFile */ - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFile() { return resFile; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java (working copy) @@ -22,12 +22,14 @@ import java.util.Map; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * DescTableDesc. * */ -@Explain(displayName = "Describe Table") +@Explain(displayName = "Describe Table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class DescTableDesc extends DDLDesc implements Serializable { public void setPartSpec(Map partSpec) { this.partSpec = partSpec; @@ -131,7 +133,7 @@ /** * @return the tableName */ - @Explain(displayName = "table") + @Explain(displayName = "table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { return tableName; } @@ -162,7 +164,7 @@ /** * @return the partSpec */ - @Explain(displayName = "partition") + @Explain(displayName = "partition", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map getPartSpec() { return partSpec; } @@ -178,7 +180,7 @@ /** * @return the resFile */ - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFile() { return resFile; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DropDatabaseDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DropDatabaseDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DropDatabaseDesc.java (working copy) @@ -19,12 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * DropDatabaseDesc. * */ -@Explain(displayName = "Drop Database") +@Explain(displayName = "Drop Database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class DropDatabaseDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -43,7 +45,7 @@ this.cascade = cascade; } - @Explain(displayName = "database") + @Explain(displayName = "database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getDatabaseName() { return databaseName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DropFunctionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DropFunctionDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DropFunctionDesc.java (working copy) @@ -19,12 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * DropFunctionDesc. * */ -@Explain(displayName = "Drop Function") +@Explain(displayName = "Drop Function", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class DropFunctionDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -42,7 +44,7 @@ this.isTemp = isTemp; } - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getFunctionName() { return functionName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DropMacroDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DropMacroDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DropMacroDesc.java (working copy) @@ -19,12 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * DropMacroDesc. * */ -@Explain(displayName = "Drop Macro") +@Explain(displayName = "Drop Macro", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class DropMacroDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -40,7 +42,7 @@ this.macroName = macroName; } - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getMacroName() { return macroName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DropTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DropTableDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DropTableDesc.java (working copy) @@ -22,12 +22,14 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * DropTableDesc. * TODO: this is currently used for both drop table and drop partitions. */ -@Explain(displayName = "Drop Table") +@Explain(displayName = "Drop Table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class DropTableDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -89,7 +91,7 @@ /** * @return the tableName */ - @Explain(displayName = "table") + @Explain(displayName = "table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { return tableName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DummyStoreDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DummyStoreDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DummyStoreDesc.java (working copy) @@ -17,13 +17,15 @@ */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * Dummy Store Desc. This is only used by sort-merge joins to store the * result for the small table (sub-query) being scanned. */ -@Explain(displayName = "Dummy Store") +@Explain(displayName = "Dummy Store", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class DummyStoreDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPruningEventDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPruningEventDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPruningEventDesc.java (working copy) @@ -22,9 +22,11 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + @SuppressWarnings("serial") -@Explain(displayName = "Dynamic Partitioning Event Operator") +@Explain(displayName = "Dynamic Partitioning Event Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class DynamicPruningEventDesc extends AppMasterEventDesc { // column in the target table that will be pruned against Index: ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java (working copy) @@ -27,9 +27,20 @@ */ @Retention(RetentionPolicy.RUNTIME) public @interface Explain { + public enum Level { + USER, DEFAULT, EXTENDED; + public boolean in(Level[] levels) { + for (Level level : levels) { + if (level.equals(this)) { + return true; + } + } + return false; + } + }; String displayName() default ""; - boolean normalExplain() default true; + Level[] explainLevels() default { Level.DEFAULT, Level.EXTENDED }; boolean displayOnlyOnTrue() default false; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java (working copy) @@ -51,6 +51,8 @@ boolean appendTaskType; boolean authorize; + boolean userLevelExplain; + String cboInfo; private transient BaseSemanticAnalyzer analyzer; @@ -67,7 +69,9 @@ boolean formatted, boolean dependency, boolean logical, - boolean authorize) { + boolean authorize, + boolean userLevelExplain, + String cboInfo) { this.resFile = resFile; this.rootTasks = new ArrayList>(rootTasks); this.fetchTask = fetchTask; @@ -80,6 +84,8 @@ this.logical = logical; this.pCtx = pCtx; this.authorize = authorize; + this.userLevelExplain = userLevelExplain; + this.cboInfo = cboInfo; } public Path getResFile() { @@ -181,4 +187,21 @@ public BaseSemanticAnalyzer getAnalyzer() { return analyzer; } + + public boolean isUserLevelExplain() { + return userLevelExplain; + } + + public void setUserLevelExplain(boolean userLevelExplain) { + this.userLevelExplain = userLevelExplain; + } + + public String getCboInfo() { + return cboInfo; + } + + public void setCboInfo(String cboInfo) { + this.cboInfo = cboInfo; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ExplosionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExplosionDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplosionDesc.java (working copy) @@ -19,12 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ExplosionDesc. * */ -@Explain(displayName = "Explosion") +@Explain(displayName = "Explosion", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ExplosionDesc implements Serializable { private static final long serialVersionUID = 1L; private String fieldName; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java (working copy) @@ -29,13 +29,14 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.parse.SplitSample; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * FetchWork. * */ -@Explain(displayName = "Fetch Operator") +@Explain(displayName = "Fetch Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class FetchWork implements Serializable { private static final long serialVersionUID = 1L; @@ -185,7 +186,7 @@ * * @return the partDesc array list */ - @Explain(displayName = "Partition Description", normalExplain = false) + @Explain(displayName = "Partition Description", explainLevels = { Level.EXTENDED }) public ArrayList getPartDescOrderedByPartDir() { ArrayList partDescOrdered = partDesc; @@ -232,7 +233,7 @@ /** * @return the limit */ - @Explain(displayName = "limit") + @Explain(displayName = "limit", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public int getLimit() { return limit; } @@ -253,7 +254,7 @@ this.leastNumRows = leastNumRows; } - @Explain(displayName = "Processor Tree") + @Explain(displayName = "Processor Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Operator getSource() { return source; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java (working copy) @@ -24,12 +24,14 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * FileSinkDesc. * */ -@Explain(displayName = "File Output Operator") +@Explain(displayName = "File Output Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class FileSinkDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; @@ -151,7 +153,7 @@ return (Object) ret; } - @Explain(displayName = "directory", normalExplain = false) + @Explain(displayName = "directory", explainLevels = { Level.EXTENDED }) public Path getDirName() { return dirName; } @@ -164,7 +166,7 @@ return linkedFileSink ? parentDir : dirName; } - @Explain(displayName = "table") + @Explain(displayName = "table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public TableDesc getTableInfo() { return tableInfo; } @@ -173,7 +175,7 @@ this.tableInfo = tableInfo; } - @Explain(displayName = "compressed") + @Explain(displayName = "compressed", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public boolean getCompressed() { return compressed; } @@ -182,7 +184,7 @@ this.compressed = compressed; } - @Explain(displayName = "GlobalTableId", normalExplain = false) + @Explain(displayName = "GlobalTableId", explainLevels = { Level.EXTENDED }) public int getDestTableId() { return destTableId; } @@ -210,7 +212,7 @@ /** * @return the multiFileSpray */ - @Explain(displayName = "MultiFileSpray", normalExplain = false) + @Explain(displayName = "MultiFileSpray", explainLevels = { Level.EXTENDED }) public boolean isMultiFileSpray() { return multiFileSpray; } @@ -248,7 +250,7 @@ /** * @return the totalFiles */ - @Explain(displayName = "TotalFiles", normalExplain = false) + @Explain(displayName = "TotalFiles", explainLevels = { Level.EXTENDED }) public int getTotalFiles() { return totalFiles; } @@ -277,7 +279,7 @@ /** * @return the numFiles */ - @Explain(displayName = "NumFilesPerFileSink", normalExplain = false) + @Explain(displayName = "NumFilesPerFileSink", explainLevels = { Level.EXTENDED }) public int getNumFiles() { return numFiles; } @@ -301,7 +303,7 @@ this.staticSpec = staticSpec; } - @Explain(displayName = "Static Partition Specification", normalExplain = false) + @Explain(displayName = "Static Partition Specification", explainLevels = { Level.EXTENDED }) public String getStaticSpec() { return staticSpec; } @@ -310,7 +312,7 @@ this.gatherStats = gatherStats; } - @Explain(displayName = "GatherStats", normalExplain = false) + @Explain(displayName = "GatherStats", explainLevels = { Level.EXTENDED }) public boolean isGatherStats() { return gatherStats; } @@ -326,7 +328,7 @@ * will be aggregated. * @return key prefix used for stats publishing and aggregation. */ - @Explain(displayName = "Stats Publishing Key Prefix", normalExplain = false) + @Explain(displayName = "Stats Publishing Key Prefix", explainLevels = { Level.EXTENDED }) public String getStatsAggPrefix() { // dirName uniquely identifies destination directory of a FileSinkOperator. // If more than one FileSinkOperator write to the same partition, this dirName Index: ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java (working copy) @@ -19,13 +19,15 @@ package org.apache.hadoop.hive.ql.plan; import java.util.List; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * FilterDesc. * */ -@Explain(displayName = "Filter Operator") +@Explain(displayName = "Filter Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class FilterDesc extends AbstractOperatorDesc { /** @@ -100,7 +102,7 @@ this.sampleDescr = sampleDescr; } - @Explain(displayName = "predicate") + @Explain(displayName = "predicate", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getPredicateString() { StringBuffer sb = new StringBuffer(); PlanUtils.addExprToStringBuffer(predicate, sb); @@ -116,7 +118,7 @@ this.predicate = predicate; } - @Explain(displayName = "isSamplingPred", normalExplain = false) + @Explain(displayName = "isSamplingPred", explainLevels = { Level.EXTENDED }) public boolean getIsSamplingPred() { return isSamplingPred; } @@ -133,7 +135,7 @@ this.sampleDescr = sampleDescr; } - @Explain(displayName = "sampleDesc", normalExplain = false) + @Explain(displayName = "sampleDesc", explainLevels = { Level.EXTENDED }) public String getSampleDescExpr() { return sampleDescr == null ? null : sampleDescr.toString(); } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ForwardDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ForwardDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ForwardDesc.java (working copy) @@ -17,13 +17,15 @@ */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ForwardDesc. * */ -@Explain(displayName = "Forward") +@Explain(displayName = "Forward", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ForwardDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/GrantDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/GrantDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GrantDesc.java (working copy) @@ -22,8 +22,10 @@ import java.util.List; import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName = "Grant") + +@Explain(displayName = "Grant", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class GrantDesc extends DDLDesc implements Serializable, Cloneable { private static final long serialVersionUID = 1L; @@ -55,7 +57,7 @@ /** * @return privileges */ - @Explain(displayName = "Privileges") + @Explain(displayName = "Privileges", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getPrivileges() { return privileges; } @@ -70,7 +72,7 @@ /** * @return principals */ - @Explain(displayName = "Principals") + @Explain(displayName = "Principals", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getPrincipals() { return principals; } @@ -85,7 +87,7 @@ /** * @return grant option */ - @Explain(displayName = "grant option") + @Explain(displayName = "grant option", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public boolean isGrantOption() { return grantOption; } @@ -100,7 +102,7 @@ /** * @return privilege subject */ - @Explain(displayName="privilege subject") + @Explain(displayName="privilege subject", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public PrivilegeObjectDesc getPrivilegeSubjectDesc() { return privilegeSubjectDesc; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/GrantRevokeRoleDDL.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/GrantRevokeRoleDDL.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GrantRevokeRoleDDL.java (working copy) @@ -21,8 +21,10 @@ import java.util.List; import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName="grant or revoke roles") + +@Explain(displayName="grant or revoke roles", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class GrantRevokeRoleDDL { private boolean grant; @@ -55,7 +57,7 @@ /** * @return grant or revoke privileges */ - @Explain(displayName="grant (or revoke)") + @Explain(displayName="grant (or revoke)", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public boolean getGrant() { return grant; } @@ -67,7 +69,7 @@ /** * @return a list of principals */ - @Explain(displayName="principals") + @Explain(displayName="principals", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getPrincipalDesc() { return principalDesc; } @@ -79,7 +81,7 @@ /** * @return a list of roles */ - @Explain(displayName="roles") + @Explain(displayName="roles", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getRoles() { return roles; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java (working copy) @@ -25,12 +25,14 @@ import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hive.common.util.AnnotationUtils; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * GroupByDesc. * */ -@Explain(displayName = "Group By Operator") +@Explain(displayName = "Group By Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class GroupByDesc extends AbstractOperatorDesc { /** * Group-by Mode: COMPLETE: complete 1-phase aggregation: iterate, terminate @@ -156,7 +158,7 @@ this.mode = mode; } - @Explain(displayName = "keys") + @Explain(displayName = "keys", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getKeyString() { return PlanUtils.getExprListString(keys); } @@ -169,7 +171,7 @@ this.keys = keys; } - @Explain(displayName = "outputColumnNames") + @Explain(displayName = "outputColumnNames", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ArrayList getOutputColumnNames() { return outputColumnNames; } @@ -201,7 +203,7 @@ this.memoryThreshold = memoryThreshold; } - @Explain(displayName = "aggregations") + @Explain(displayName = "aggregations", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getAggregatorStrings() { List res = new ArrayList(); for (AggregationDesc agg: aggregators) { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableDummyDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableDummyDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableDummyDesc.java (working copy) @@ -17,12 +17,14 @@ */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * HashTable Dummy Descriptor implementation. * */ -@Explain(displayName = "HashTable Dummy Operator") +@Explain(displayName = "HashTable Dummy Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class HashTableDummyDesc extends AbstractOperatorDesc { private TableDesc tbl; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java (working copy) @@ -25,12 +25,14 @@ import java.util.Map; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * Map Join operator Descriptor implementation. * */ -@Explain(displayName = "HashTable Sink Operator") +@Explain(displayName = "HashTable Sink Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class HashTableSinkDesc extends JoinDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -285,7 +287,7 @@ } @Override - @Explain(displayName = "filter mappings", normalExplain = false) + @Explain(displayName = "filter mappings", explainLevels = { Level.EXTENDED }) public Map getFilterMapString() { return toCompactString(filterMap); } @@ -301,7 +303,7 @@ /** * @return the keys in string form */ - @Explain(displayName = "keys") + @Explain(displayName = "keys", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map getKeysString() { Map keyMap = new LinkedHashMap(); for (Map.Entry> k: getKeys().entrySet()) { @@ -328,7 +330,7 @@ /** * @return the position of the big table not in memory */ - @Explain(displayName = "Position of Big Table", normalExplain = false) + @Explain(displayName = "Position of Big Table", explainLevels = { Level.EXTENDED }) public int getPosBigTable() { return posBigTable; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java (working copy) @@ -19,7 +19,9 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * Join conditions Descriptor implementation. * @@ -107,7 +109,7 @@ this.type = type; } - @Explain + @Explain(explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getJoinCondString() { StringBuilder sb = new StringBuilder(); Index: ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java (working copy) @@ -29,12 +29,14 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.parse.QBJoinTree; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * Join operator Descriptor implementation. * */ -@Explain(displayName = "Join Operator") +@Explain(displayName = "Join Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class JoinDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; public static final int INNER_JOIN = 0; @@ -209,7 +211,7 @@ /** * @return the keys in string form */ - @Explain(displayName = "keys") + @Explain(displayName = "keys", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map getKeysString() { Map keyMap = new LinkedHashMap(); for (byte i = 0; i < joinKeys.length; i++) { @@ -229,7 +231,7 @@ * * @return Map from alias to filters on the alias. */ - @Explain(displayName = "filter predicates") + @Explain(displayName = "filter predicates", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map getFiltersStringMap() { if (getFilters() == null || getFilters().size() == 0) { return null; @@ -275,7 +277,7 @@ this.filters = filters; } - @Explain(displayName = "outputColumnNames") + @Explain(displayName = "outputColumnNames", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getOutputColumnNames() { return outputColumnNames; } @@ -293,7 +295,7 @@ this.noOuterJoin = noOuterJoin; } - @Explain(displayName = "condition map") + @Explain(displayName = "condition map", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getCondsList() { if (conds == null) { return null; @@ -453,7 +455,7 @@ this.filterMap = filterMap; } - @Explain(displayName = "filter mappings", normalExplain = false) + @Explain(displayName = "filter mappings", explainLevels = { Level.EXTENDED }) public Map getFilterMapString() { return toCompactString(filterMap); } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewForwardDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewForwardDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewForwardDesc.java (working copy) @@ -17,13 +17,15 @@ */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * LateralViewForwardDesc. * */ -@Explain(displayName = "Lateral View Forward") +@Explain(displayName = "Lateral View Forward", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class LateralViewForwardDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java (working copy) @@ -19,13 +19,15 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * LateralViewJoinDesc. * */ -@Explain(displayName = "Lateral View Join Operator") +@Explain(displayName = "Lateral View Join Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class LateralViewJoinDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; @@ -44,7 +46,7 @@ this.outputInternalColNames = outputInternalColNames; } - @Explain(displayName = "outputColumnNames") + @Explain(displayName = "outputColumnNames", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ArrayList getOutputInternalColNames() { return outputInternalColNames; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java (working copy) @@ -17,13 +17,14 @@ */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * LimitDesc. * */ -@Explain(displayName = "Limit") +@Explain(displayName = "Limit", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class LimitDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; private int limit; @@ -36,7 +37,7 @@ this.limit = limit; } - @Explain(displayName = "Number of rows") + @Explain(displayName = "Number of rows", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public int getLimit() { return limit; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/LoadDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/LoadDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LoadDesc.java (working copy) @@ -21,6 +21,7 @@ import java.io.Serializable; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * LoadDesc. @@ -37,7 +38,7 @@ this.sourcePath = sourcePath; } - @Explain(displayName = "source", normalExplain = false) + @Explain(displayName = "source", explainLevels = { Level.EXTENDED }) public Path getSourcePath() { return sourcePath; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java (working copy) @@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * LoadTableDesc. @@ -125,7 +126,7 @@ return holdDDLTime; } - @Explain(displayName = "table") + @Explain(displayName = "table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public TableDesc getTable() { return table; } @@ -134,7 +135,7 @@ this.table = table; } - @Explain(displayName = "partition") + @Explain(displayName = "partition", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map getPartitionSpec() { return partitionSpec; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/LockDatabaseDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/LockDatabaseDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LockDatabaseDesc.java (working copy) @@ -19,12 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * LockDatabaseDesc. * */ -@Explain(displayName = "Lock Database") +@Explain(displayName = "Lock Database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class LockDatabaseDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -42,7 +44,7 @@ this.queryId = queryId; } - @Explain(displayName = "database") + @Explain(displayName = "database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getDatabaseName() { return databaseName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/LockTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/LockTableDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LockTableDesc.java (working copy) @@ -22,12 +22,14 @@ import java.util.Map; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * LockTableDesc. * */ -@Explain(displayName = "Lock Table") +@Explain(displayName = "Lock Table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class LockTableDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (working copy) @@ -27,12 +27,13 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * Map Join operator Descriptor implementation. * */ -@Explain(displayName = "Map Join Operator") +@Explain(displayName = "Map Join Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class MapJoinDesc extends JoinDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -121,7 +122,7 @@ } } - @Explain(displayName = "input vertices") + @Explain(displayName = "input vertices", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map getParentToInput() { return parentToInput; } @@ -138,7 +139,7 @@ return parentDataSizes; } - @Explain(displayName = "Estimated key counts", normalExplain = false) + @Explain(displayName = "Estimated key counts", explainLevels = { Level.EXTENDED }) public String getKeyCountsExplainDesc() { StringBuilder result = null; for (Map.Entry entry : parentKeyCounts.entrySet()) { @@ -195,7 +196,7 @@ * @return the keys in string form */ @Override - @Explain(displayName = "keys") + @Explain(displayName = "keys", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map getKeysString() { Map keyMap = new LinkedHashMap(); for (Map.Entry> k: getKeys().entrySet()) { @@ -222,7 +223,7 @@ /** * @return the position of the big table not in memory */ - @Explain(displayName = "Position of Big Table", normalExplain = false) + @Explain(displayName = "Position of Big Table", explainLevels = { Level.EXTENDED }) public int getPosBigTable() { return posBigTable; } @@ -312,7 +313,7 @@ this.bigTablePartSpecToFileMapping = partToFileMapping; } - @Explain(displayName = "BucketMapJoin", normalExplain = false, displayOnlyOnTrue = true) + @Explain(displayName = "BucketMapJoin", explainLevels = { Level.EXTENDED }, displayOnlyOnTrue = true) public boolean isBucketMapJoin() { return isBucketMapJoin; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (working copy) @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; import org.apache.hadoop.hive.ql.parse.SplitSample; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.mapred.JobConf; import com.google.common.collect.Interner; @@ -134,7 +135,7 @@ super(name); } - @Explain(displayName = "Path -> Alias", normalExplain = false) + @Explain(displayName = "Path -> Alias", explainLevels = { Level.EXTENDED }) public LinkedHashMap> getPathToAliases() { return pathToAliases; } @@ -155,7 +156,7 @@ * * @return */ - @Explain(displayName = "Truncated Path -> Alias", normalExplain = false) + @Explain(displayName = "Truncated Path -> Alias", explainLevels = { Level.EXTENDED }) public Map> getTruncatedPathToAliases() { Map> trunPathToAliases = new LinkedHashMap>(); @@ -170,7 +171,7 @@ return trunPathToAliases; } - @Explain(displayName = "Path -> Partition", normalExplain = false) + @Explain(displayName = "Path -> Partition", explainLevels = { Level.EXTENDED }) public LinkedHashMap getPathToPartitionInfo() { return pathToPartitionInfo; } @@ -240,7 +241,7 @@ this.aliasToWork = aliasToWork; } - @Explain(displayName = "Split Sample", normalExplain = false) + @Explain(displayName = "Split Sample", explainLevels = { Level.EXTENDED }) public HashMap getNameToSplitSample() { return nameToSplitSample; } @@ -329,7 +330,7 @@ } @Override - @Explain(displayName = "Map Operator Tree") + @Explain(displayName = "Map Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Set> getAllRootOperators() { Set> opSet = new LinkedHashSet>(); @@ -467,12 +468,12 @@ mapWork.useBucketizedHiveInputFormat |= useBucketizedHiveInputFormat; } - @Explain(displayName = "Path -> Bucketed Columns", normalExplain = false) + @Explain(displayName = "Path -> Bucketed Columns", explainLevels = { Level.EXTENDED }) public Map> getBucketedColsByDirectory() { return bucketedColsByDirectory; } - @Explain(displayName = "Path -> Sorted Columns", normalExplain = false) + @Explain(displayName = "Path -> Sorted Columns", explainLevels = { Level.EXTENDED }) public Map> getSortedColsByDirectory() { return sortedColsByDirectory; } @@ -493,7 +494,7 @@ this.samplingType = samplingType; } - @Explain(displayName = "Sampling", normalExplain = false) + @Explain(displayName = "Sampling", explainLevels = { Level.EXTENDED }) public String getSamplingTypeString() { return samplingType == 1 ? "SAMPLING_ON_PREV_MR" : samplingType == 2 ? "SAMPLING_ON_START" : null; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (working copy) @@ -31,12 +31,14 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * MapredLocalWork. * */ -@Explain(displayName = "Map Reduce Local Work") +@Explain(displayName = "Map Reduce Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class MapredLocalWork implements Serializable { private static final long serialVersionUID = 1L; @@ -83,7 +85,7 @@ } - @Explain(displayName = "Alias -> Map Local Operator Tree") + @Explain(displayName = "Alias -> Map Local Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public LinkedHashMap> getAliasToWork() { return aliasToWork; } @@ -104,7 +106,7 @@ /** * @return the aliasToFetchWork */ - @Explain(displayName = "Alias -> Map Local Tables") + @Explain(displayName = "Alias -> Map Local Tables", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public LinkedHashMap getAliasToFetchWork() { return aliasToFetchWork; } @@ -138,7 +140,7 @@ } } - @Explain(displayName = "Bucket Mapjoin Context", normalExplain = false) + @Explain(displayName = "Bucket Mapjoin Context", explainLevels = { Level.EXTENDED }) public BucketMapJoinContext getBucketMapjoinContextExplain() { return bucketMapjoinContext != null && bucketMapjoinContext.getBucketFileNameMapping() != null ? bucketMapjoinContext : null; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (working copy) @@ -23,13 +23,15 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * MapredWork. * */ -@Explain(displayName = "Map Reduce") +@Explain(displayName = "Map Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class MapredWork extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; @@ -38,7 +40,7 @@ private boolean finalMapRed; - @Explain(skipHeader = true, displayName = "Map") + @Explain(skipHeader = true, displayName = "Map", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public MapWork getMapWork() { return mapWork; } @@ -47,7 +49,7 @@ this.mapWork = mapWork; } - @Explain(skipHeader = true, displayName = "Reduce") + @Explain(skipHeader = true, displayName = "Reduce", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public ReduceWork getReduceWork() { return reduceWork; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java (working copy) @@ -21,11 +21,14 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.mapred.JobConf; public class MergeJoinWork extends BaseWork { @@ -65,7 +68,8 @@ this.mergeJoinOp = mergeJoinOp; } - public void addMergedWork(BaseWork work, BaseWork connectWork) { + public void addMergedWork(BaseWork work, BaseWork connectWork, + Map, BaseWork> leafOperatorToFollowingWork) { if (work != null) { if ((bigTableWork != null) && (bigTableWork != work)) { assert false; @@ -76,10 +80,43 @@ if (connectWork != null) { this.mergeWorkList.add(connectWork); + if ((connectWork instanceof ReduceWork) && (bigTableWork != null)) { + /* + * For tez to route data from an up-stream vertex correctly to the following vertex, the + * output name in the reduce sink needs to be setup appropriately. In the case of reduce + * side merge work, we need to ensure that the parent work that provides data to this merge + * work is setup to point to the right vertex name - the main work name. + * + * In this case, if the big table work has already been created, we can hook up the merge + * work items for the small table correctly. + */ + setReduceSinkOutputName(connectWork, leafOperatorToFollowingWork, bigTableWork.getName()); + } } + + if (work != null) { + /* + * Same reason as above. This is the case when we have the main work item after the merge work + * has been created for the small table side. + */ + for (BaseWork mergeWork : mergeWorkList) { + if (mergeWork instanceof ReduceWork) { + setReduceSinkOutputName(mergeWork, leafOperatorToFollowingWork, work.getName()); + } + } + } } - @Explain(skipHeader = true, displayName = "Join") + private void setReduceSinkOutputName(BaseWork mergeWork, + Map, BaseWork> leafOperatorToFollowingWork, String name) { + for (Entry, BaseWork> entry : leafOperatorToFollowingWork.entrySet()) { + if (entry.getValue() == mergeWork) { + ((ReduceSinkOperator) entry.getKey()).getConf().setOutputName(name); + } + } + } + + @Explain(skipHeader=true, displayName = "Join", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getBaseWorkList() { return mergeWorkList; } @@ -88,7 +125,7 @@ return ((MapWork) bigTableWork).getAliasToWork().keySet().iterator().next(); } - @Explain(skipHeader = true, displayName = "Main") + @Explain(skipHeader=true, displayName = "Main", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public BaseWork getMainWork() { return bigTableWork; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java (working copy) @@ -25,12 +25,14 @@ import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * MoveWork. * */ -@Explain(displayName = "Move Operator") +@Explain(displayName = "Move Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class MoveWork implements Serializable { private static final long serialVersionUID = 1L; private LoadTableDesc loadTableWork; @@ -81,7 +83,7 @@ this.checkFileFormat = checkFileFormat; } - @Explain(displayName = "tables") + @Explain(displayName = "tables", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public LoadTableDesc getLoadTableWork() { return loadTableWork; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MuxDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MuxDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MuxDesc.java (working copy) @@ -24,13 +24,15 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * Mux operator descriptor implementation.. * */ -@Explain(displayName = "Mux Operator") +@Explain(displayName = "Mux Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class MuxDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java (working copy) @@ -25,14 +25,11 @@ List> bucketColNames; List> sortColNames; int numBuckets; - int numReduceSinks; - public OpTraits(List> bucketColNames, int numBuckets, - List> sortColNames, int numReduceSinks) { + public OpTraits(List> bucketColNames, int numBuckets, List> sortColNames) { this.bucketColNames = bucketColNames; this.numBuckets = numBuckets; this.sortColNames = sortColNames; - this.numReduceSinks = numReduceSinks; } public List> getBucketColNames() { @@ -58,12 +55,4 @@ public List> getSortCols() { return sortColNames; } - - public void setNumReduceSinks(int numReduceSinks) { - this.numReduceSinks = numReduceSinks; - } - - public int getNumReduceSinks() { - return this.numReduceSinks; - } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/OrcFileMergeDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/OrcFileMergeDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OrcFileMergeDesc.java (working copy) @@ -18,11 +18,13 @@ package org.apache.hadoop.hive.ql.plan; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ORC fast file merge operator descriptor. */ -@Explain(displayName = "ORC File Merge Operator") +@Explain(displayName = "ORC File Merge Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class OrcFileMergeDesc extends FileMergeDesc { public OrcFileMergeDesc() { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java (working copy) @@ -32,8 +32,10 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName = "PTF Operator") + +@Explain(displayName = "PTF Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class PTFDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; @SuppressWarnings("unused") @@ -65,7 +67,7 @@ return funcDef == null ? null : funcDef.getStartOfChain(); } - @Explain(displayName = "Function definitions") + @Explain(displayName = "Function definitions", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getFuncDefExplain() { if (funcDef == null) { return null; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java (working copy) @@ -42,12 +42,14 @@ import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hive.common.util.HiveStringUtils; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * PartitionDesc. * */ -@Explain(displayName = "Partition") +@Explain(displayName = "Partition", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class PartitionDesc implements Serializable, Cloneable { static { @@ -94,7 +96,7 @@ setOutputFileFormatClass(part.getOutputFormatClass()); } - @Explain(displayName = "") + @Explain(displayName = "", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public TableDesc getTableDesc() { return tableDesc; } @@ -103,7 +105,7 @@ this.tableDesc = tableDesc; } - @Explain(displayName = "partition values") + @Explain(displayName = "partition values", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public LinkedHashMap getPartSpec() { return partSpec; } @@ -176,7 +178,7 @@ return properties; } - @Explain(displayName = "properties", normalExplain = false) + @Explain(displayName = "properties", explainLevels = { Level.EXTENDED }) public Map getPropertiesExplain() { return HiveStringUtils.getPropertiesExplain(getProperties()); } @@ -196,27 +198,27 @@ /** * @return the serdeClassName */ - @Explain(displayName = "serde") + @Explain(displayName = "serde", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getSerdeClassName() { return getProperties().getProperty(serdeConstants.SERIALIZATION_LIB); } - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { return getProperties().getProperty(hive_metastoreConstants.META_TABLE_NAME); } - @Explain(displayName = "input format") + @Explain(displayName = "input format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getInputFileFormatClassName() { return getInputFileFormatClass().getName(); } - @Explain(displayName = "output format") + @Explain(displayName = "output format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getOutputFileFormatClassName() { return getOutputFileFormatClass().getName(); } - @Explain(displayName = "base file name", normalExplain = false) + @Explain(displayName = "base file name", explainLevels = { Level.EXTENDED }) public String getBaseFileName() { return baseFileName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/PrincipalDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/PrincipalDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PrincipalDesc.java (working copy) @@ -21,8 +21,10 @@ import java.io.Serializable; import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName = "Principal") + +@Explain(displayName = "Principal", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class PrincipalDesc implements Serializable, Cloneable { private static final long serialVersionUID = 1L; @@ -41,7 +43,7 @@ super(); } - @Explain(displayName="name") + @Explain(displayName="name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getName() { return name; } @@ -50,7 +52,7 @@ this.name = name; } - @Explain(displayName="type", normalExplain = false) + @Explain(displayName="type", explainLevels = { Level.EXTENDED }) public PrincipalType getType() { return type; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/PrivilegeDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/PrivilegeDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PrivilegeDesc.java (working copy) @@ -22,8 +22,10 @@ import java.util.List; import org.apache.hadoop.hive.ql.security.authorization.Privilege; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName = "Privilege") + +@Explain(displayName = "Privilege", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class PrivilegeDesc implements Serializable, Cloneable { private static final long serialVersionUID = 1L; @@ -44,7 +46,7 @@ /** * @return privilege definition */ - @Explain(displayName = "privilege") + @Explain(displayName = "privilege", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Privilege getPrivilege() { return privilege; } @@ -59,7 +61,7 @@ /** * @return columns on which the given privilege take affect. */ - @Explain(displayName = "columns") + @Explain(displayName = "columns", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getColumns() { return columns; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/PrivilegeObjectDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/PrivilegeObjectDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PrivilegeObjectDesc.java (working copy) @@ -20,8 +20,10 @@ import java.util.HashMap; import java.util.List; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName="privilege subject") + +@Explain(displayName="privilege subject", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class PrivilegeObjectDesc { //default type is table @@ -53,7 +55,7 @@ this.table = isTable; } - @Explain(displayName="object") + @Explain(displayName="object", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getObject() { return object; } @@ -62,7 +64,7 @@ this.object = object; } - @Explain(displayName="partition spec") + @Explain(displayName="partition spec", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public HashMap getPartSpec() { return partSpec; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/RCFileMergeDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/RCFileMergeDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/RCFileMergeDesc.java (working copy) @@ -18,11 +18,13 @@ package org.apache.hadoop.hive.ql.plan; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * Descriptor for Fast file merge RC file operator. */ -@Explain(displayName = "RCFile Merge Operator") +@Explain(displayName = "RCFile Merge Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class RCFileMergeDesc extends FileMergeDesc { public RCFileMergeDesc() { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java (working copy) @@ -21,17 +21,20 @@ import java.util.ArrayList; import java.util.EnumSet; import java.util.List; +import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ReduceSinkDesc. * */ -@Explain(displayName = "Reduce Output Operator") +@Explain(displayName = "Reduce Output Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ReduceSinkDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; /** @@ -194,7 +197,7 @@ this.outputValueColumnNames = outputValueColumnNames; } - @Explain(displayName = "key expressions") + @Explain(displayName = "key expressions", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getKeyColString() { return PlanUtils.getExprListString(keyCols); } @@ -215,7 +218,7 @@ this.numDistributionKeys = numKeys; } - @Explain(displayName = "value expressions") + @Explain(displayName = "value expressions", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getValueColsString() { return PlanUtils.getExprListString(valueCols); } @@ -228,7 +231,7 @@ this.valueCols = valueCols; } - @Explain(displayName = "Map-reduce partition columns") + @Explain(displayName = "Map-reduce partition columns", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getParitionColsString() { return PlanUtils.getExprListString(partitionCols); } @@ -249,7 +252,7 @@ return false; } - @Explain(displayName = "tag", normalExplain = false) + @Explain(displayName = "tag", explainLevels = { Level.EXTENDED }) public int getTag() { return tag; } @@ -266,7 +269,7 @@ this.topN = topN; } - @Explain(displayName = "TopN", normalExplain = false) + @Explain(displayName = "TopN", explainLevels = { Level.EXTENDED }) public Integer getTopNExplain() { return topN > 0 ? topN : null; } @@ -337,7 +340,7 @@ * of the same length as key columns, that consists of only "+" * (ascending order) and "-" (descending order). */ - @Explain(displayName = "sort order") + @Explain(displayName = "sort order", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getOrder() { return keySerializeInfo.getProperties().getProperty( org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_SORT_ORDER); @@ -365,6 +368,7 @@ this.distinctColumnIndices = distinctColumnIndices; } + @Explain(displayName = "outputname", explainLevels = { Level.USER }) public String getOutputName() { return outputName; } @@ -397,7 +401,7 @@ return skipTag; } - @Explain(displayName = "auto parallelism", normalExplain = false) + @Explain(displayName = "auto parallelism", explainLevels = { Level.EXTENDED }) public final boolean isAutoParallel() { return (this.reduceTraits.contains(ReducerTraits.AUTOPARALLEL)); } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java (working copy) @@ -24,6 +24,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import org.apache.commons.logging.Log; @@ -32,6 +33,7 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -153,7 +155,7 @@ return vectorMode ? "vectorized" : null; } - @Explain(displayName = "Reduce Operator Tree") + @Explain(displayName = "Reduce Operator Tree", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Operator getReducer() { return reducer; } @@ -162,7 +164,7 @@ this.reducer = reducer; } - @Explain(displayName = "Needs Tagging", normalExplain = false) + @Explain(displayName = "Needs Tagging", explainLevels = { Level.EXTENDED }) public boolean getNeedsTagging() { return needsTagging; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/RevokeDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/RevokeDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/RevokeDesc.java (working copy) @@ -20,8 +20,10 @@ import java.io.Serializable; import java.util.List; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName="Revoke") + +@Explain(displayName="Revoke", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class RevokeDesc extends DDLDesc implements Serializable, Cloneable { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/RoleDDLDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/RoleDDLDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/RoleDDLDesc.java (working copy) @@ -21,8 +21,10 @@ import java.io.Serializable; import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName = "Create Role") + +@Explain(displayName = "Create Role", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class RoleDDLDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -108,12 +110,12 @@ this.roleOwnerName = roleOwnerName; } - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getName() { return name; } - @Explain(displayName = "role operation") + @Explain(displayName = "role operation", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public RoleOperation getOperation() { return operation; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/SMBJoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/SMBJoinDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SMBJoinDesc.java (working copy) @@ -23,8 +23,10 @@ import java.util.Map; import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName = "Sorted Merge Bucket Map Join Operator") + +@Explain(displayName = "Sorted Merge Bucket Map Join Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class SMBJoinDesc extends MapJoinDesc implements Serializable { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ScriptDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ScriptDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ScriptDesc.java (working copy) @@ -20,12 +20,14 @@ import org.apache.hadoop.hive.ql.exec.RecordReader; import org.apache.hadoop.hive.ql.exec.RecordWriter; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ScriptDesc. * */ -@Explain(displayName = "Transform Operator") +@Explain(displayName = "Transform Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ScriptDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; private String scriptCmd; @@ -59,7 +61,7 @@ this.scriptErrInfo = scriptErrInfo; } - @Explain(displayName = "command") + @Explain(displayName = "command", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getScriptCmd() { return scriptCmd; } @@ -68,7 +70,7 @@ this.scriptCmd = scriptCmd; } - @Explain(displayName = "output info") + @Explain(displayName = "output info", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public TableDesc getScriptOutputInfo() { return scriptOutputInfo; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java (working copy) @@ -20,13 +20,14 @@ import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * SelectDesc. * */ -@Explain(displayName = "Select Operator") +@Explain(displayName = "Select Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class SelectDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; private List colList; @@ -81,7 +82,7 @@ this.colList = colList; } - @Explain(displayName = "outputColumnNames") + @Explain(displayName = "outputColumnNames", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getOutputColumnNames() { return outputColumnNames; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowColumnsDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowColumnsDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowColumnsDesc.java (working copy) @@ -20,6 +20,7 @@ import java.io.Serializable; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; public class ShowColumnsDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -64,7 +65,7 @@ /** * @return the tableName */ - @Explain(displayName = "table name") + @Explain(displayName = "table name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { return tableName; } @@ -80,7 +81,7 @@ /** * @return the resFile */ - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFile() { return resFile; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowConfDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowConfDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowConfDesc.java (working copy) @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.plan; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import java.io.Serializable; @@ -41,7 +42,7 @@ this.confName = confName; } - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public Path getResFile() { return resFile; } @@ -50,7 +51,7 @@ this.resFile = resFile; } - @Explain(displayName = "conf name", normalExplain = false) + @Explain(displayName = "conf name", explainLevels = { Level.EXTENDED }) public String getConfName() { return confName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowCreateTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowCreateTableDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowCreateTableDesc.java (working copy) @@ -19,12 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ShowCreateTableDesc. * */ -@Explain(displayName = "Show Create Table") +@Explain(displayName = "Show Create Table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ShowCreateTableDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; String resFile; @@ -66,7 +68,7 @@ /** * @return the resFile */ - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFile() { return resFile; } @@ -82,7 +84,7 @@ /** * @return the tableName */ - @Explain(displayName = "table name") + @Explain(displayName = "table name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { return tableName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowDatabasesDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowDatabasesDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowDatabasesDesc.java (working copy) @@ -21,12 +21,14 @@ import java.io.Serializable; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ShowDatabasesDesc. * */ -@Explain(displayName = "Show Databases") +@Explain(displayName = "Show Databases", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ShowDatabasesDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; String pattern; @@ -89,7 +91,7 @@ /** * @return the resFile */ - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFile() { return resFile; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowFunctionsDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowFunctionsDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowFunctionsDesc.java (working copy) @@ -21,12 +21,14 @@ import java.io.Serializable; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ShowFunctionsDesc. * */ -@Explain(displayName = "Show Functions") +@Explain(displayName = "Show Functions", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ShowFunctionsDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; String pattern; @@ -103,7 +105,7 @@ /** * @return the resFile */ - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFile() { return resFile; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowGrantDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowGrantDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowGrantDesc.java (working copy) @@ -16,8 +16,10 @@ * limitations under the License. */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName="show grant desc") + +@Explain(displayName="show grant desc", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ShowGrantDesc { private PrincipalDesc principalDesc; @@ -48,7 +50,7 @@ return tabularSchema; } - @Explain(displayName="principal desc") + @Explain(displayName="principal desc", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public PrincipalDesc getPrincipalDesc() { return principalDesc; } @@ -57,7 +59,7 @@ this.principalDesc = principalDesc; } - @Explain(displayName="object") + @Explain(displayName="object", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public PrivilegeObjectDesc getHiveObj() { return hiveObj; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowIndexesDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowIndexesDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowIndexesDesc.java (working copy) @@ -21,12 +21,14 @@ import java.io.Serializable; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ShowIndexesDesc. * Returns table index information per SQL syntax. */ -@Explain(displayName = "Show Indexes") +@Explain(displayName = "Show Indexes", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ShowIndexesDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; String tableName; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowLocksDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowLocksDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowLocksDesc.java (working copy) @@ -23,12 +23,14 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ShowLocksDesc. * */ -@Explain(displayName = "Show Locks") +@Explain(displayName = "Show Locks", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ShowLocksDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; String resFile; @@ -105,7 +107,7 @@ /** * @return the tableName */ - @Explain(displayName = "table") + @Explain(displayName = "table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { return tableName; } @@ -121,7 +123,7 @@ /** * @return the partSpec */ - @Explain(displayName = "partition") + @Explain(displayName = "partition", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public HashMap getPartSpec() { return partSpec; } @@ -137,7 +139,7 @@ /** * @return the resFile */ - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFile() { return resFile; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowPartitionsDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowPartitionsDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowPartitionsDesc.java (working copy) @@ -22,12 +22,14 @@ import java.util.Map; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ShowPartitionsDesc. * */ -@Explain(displayName = "Show Partitions") +@Explain(displayName = "Show Partitions", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ShowPartitionsDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; String tabName; @@ -71,7 +73,7 @@ /** * @return the name of the table. */ - @Explain(displayName = "table") + @Explain(displayName = "table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTabName() { return tabName; } @@ -87,7 +89,7 @@ /** * @return the name of the table. */ - @Explain(displayName = "partSpec") + @Explain(displayName = "partSpec", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map getPartSpec() { return partSpec; } @@ -102,7 +104,7 @@ /** * @return the results file */ - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFile() { return resFile; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowTableStatusDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowTableStatusDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowTableStatusDesc.java (working copy) @@ -22,12 +22,14 @@ import java.util.HashMap; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ShowTableStatusDesc. * */ -@Explain(displayName = "Show Table Status") +@Explain(displayName = "Show Table Status", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ShowTableStatusDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; String pattern; @@ -108,7 +110,7 @@ return resFile; } - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFileString() { return getResFile(); } @@ -124,7 +126,7 @@ /** * @return the database name */ - @Explain(displayName = "database") + @Explain(displayName = "database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getDbName() { return dbName; } @@ -140,7 +142,7 @@ /** * @return the partSpec */ - @Explain(displayName = "partition") + @Explain(displayName = "partition", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public HashMap getPartSpec() { return partSpec; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowTablesDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowTablesDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowTablesDesc.java (working copy) @@ -21,12 +21,14 @@ import java.io.Serializable; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ShowTablesDesc. * */ -@Explain(displayName = "Show Tables") +@Explain(displayName = "Show Tables", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ShowTablesDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; String pattern; @@ -98,7 +100,7 @@ /** * @return the resFile */ - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFile() { return resFile; } @@ -114,7 +116,7 @@ /** * @return the dbName */ - @Explain(displayName = "database name") + @Explain(displayName = "database name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getDbName() { return dbName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ShowTblPropertiesDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ShowTblPropertiesDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ShowTblPropertiesDesc.java (working copy) @@ -22,12 +22,14 @@ import java.util.HashMap; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ShowTblPropertiesDesc. * */ -@Explain(displayName = "Show Table Properties") +@Explain(displayName = "Show Table Properties", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class ShowTblPropertiesDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; String resFile; @@ -77,7 +79,7 @@ return resFile; } - @Explain(displayName = "result file", normalExplain = false) + @Explain(displayName = "result file", explainLevels = { Level.EXTENDED }) public String getResFileString() { return getResFile(); } @@ -93,7 +95,7 @@ /** * @return the tableName */ - @Explain(displayName = "table name") + @Explain(displayName = "table name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { return tableName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java (working copy) @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.stats.StatsUtils; import com.google.common.collect.Lists; @@ -101,7 +102,7 @@ } @Override - @Explain(displayName = "Statistics") + @Explain(displayName = "Statistics", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Num rows: "); Index: ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java (working copy) @@ -22,12 +22,14 @@ import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * Client-side stats aggregator task. */ -@Explain(displayName = "Stats-Aggr Operator") +@Explain(displayName = "Stats-Aggr Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class StatsNoJobWork implements Serializable { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java (working copy) @@ -22,12 +22,14 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * ConditionalStats. * */ -@Explain(displayName = "Stats-Aggr Operator") +@Explain(displayName = "Stats-Aggr Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class StatsWork implements Serializable { private static final long serialVersionUID = 1L; @@ -90,7 +92,7 @@ aggKey = aggK; } - @Explain(displayName = "Stats Aggregation Key Prefix", normalExplain = false) + @Explain(displayName = "Stats Aggregation Key Prefix", explainLevels = { Level.EXTENDED }) public String getAggKey() { return aggKey; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/SwitchDatabaseDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/SwitchDatabaseDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SwitchDatabaseDesc.java (working copy) @@ -19,12 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * SwitchDatabaseDesc. * */ -@Explain(displayName = "Switch Database") +@Explain(displayName = "Switch Database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class SwitchDatabaseDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -39,7 +41,7 @@ this.databaseName = databaseName; } - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getDatabaseName() { return databaseName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java (working copy) @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -117,7 +118,7 @@ return properties; } - @Explain(displayName = "properties", normalExplain = false) + @Explain(displayName = "properties", explainLevels = { Level.EXTENDED }) public Map getPropertiesExplain() { return HiveStringUtils.getPropertiesExplain(getProperties()); } @@ -130,7 +131,7 @@ this.jobProperties = jobProperties; } - @Explain(displayName = "jobProperties", normalExplain = false) + @Explain(displayName = "jobProperties", explainLevels = { Level.EXTENDED }) public Map getJobProperties() { return jobProperties; } @@ -138,23 +139,23 @@ /** * @return the serdeClassName */ - @Explain(displayName = "serde") + @Explain(displayName = "serde", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getSerdeClassName() { return properties.getProperty(serdeConstants.SERIALIZATION_LIB); } - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { return properties .getProperty(hive_metastoreConstants.META_TABLE_NAME); } - @Explain(displayName = "input format") + @Explain(displayName = "input format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getInputFileFormatClassName() { return getInputFileFormatClass().getName(); } - @Explain(displayName = "output format") + @Explain(displayName = "output format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getOutputFileFormatClassName() { return getOutputFileFormatClass().getName(); } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java (working copy) @@ -27,13 +27,15 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.TableSample; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * Table Scan Descriptor Currently, data is only read from a base source as part * of map-reduce framework. So, nothing is stored in the descriptor. But, more * things will be added here as table scan is invoked as part of local work. **/ -@Explain(displayName = "TableScan") +@Explain(displayName = "TableScan", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class TableScanDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; @@ -125,7 +127,7 @@ return new TableScanDesc(getAlias(), vcs, this.tableMetadata); } - @Explain(displayName = "alias") + @Explain(displayName = "alias", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getAlias() { return alias; } @@ -193,7 +195,7 @@ this.gatherStats = gatherStats; } - @Explain(displayName = "GatherStats", normalExplain = false) + @Explain(displayName = "GatherStats", explainLevels = { Level.EXTENDED }) public boolean isGatherStats() { return gatherStats; } @@ -218,7 +220,7 @@ statsAggKeyPrefix = k; } - @Explain(displayName = "Statistics Aggregation Key Prefix", normalExplain = false) + @Explain(displayName = "Statistics Aggregation Key Prefix", explainLevels = { Level.EXTENDED }) public String getStatsAggPrefix() { return statsAggKeyPrefix; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java (working copy) @@ -36,7 +36,9 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * TezWork. This class encapsulates all the work objects that can be executed * in a single tez job. Currently it's basically a tree with MapWork at the @@ -44,7 +46,7 @@ * */ @SuppressWarnings("serial") -@Explain(displayName = "Tez") +@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class TezWork extends AbstractOperatorDesc { public enum VertexType { @@ -87,7 +89,7 @@ /** * getWorkMap returns a map of "vertex name" to BaseWork */ - @Explain(displayName = "Vertices") + @Explain(displayName = "Vertices", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map getWorkMap() { Map result = new LinkedHashMap(); for (BaseWork w: getAllWork()) { @@ -286,7 +288,7 @@ } } - @Explain(displayName = "Edges") + @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map> getDependencyMap() { Map> result = new LinkedHashMap>(); for (Map.Entry> entry: invertedWorkGraph.entrySet()) { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java (working copy) @@ -22,11 +22,13 @@ import java.util.Map; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * Truncates managed table or partition */ -@Explain(displayName = "Truncate Table or Partition") +@Explain(displayName = "Truncate Table or Partition", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class TruncateTableDesc extends DDLDesc { private static final long serialVersionUID = 1L; @@ -46,7 +48,7 @@ this.partSpec = partSpec; } - @Explain(displayName = "TableName") + @Explain(displayName = "TableName", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { return tableName; } @@ -55,7 +57,7 @@ this.tableName = tableName; } - @Explain(displayName = "Partition Spec") + @Explain(displayName = "Partition Spec", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public Map getPartSpec() { return partSpec; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/UDTFDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/UDTFDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/UDTFDesc.java (working copy) @@ -19,14 +19,16 @@ package org.apache.hadoop.hive.ql.plan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * All member variables should have a setters and getters of the form get and set or else they won't be recreated properly at run * time. * */ -@Explain(displayName = "UDTF Operator") +@Explain(displayName = "UDTF Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class UDTFDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; @@ -49,7 +51,7 @@ this.genericUDTF = genericUDTF; } - @Explain(displayName = "function name") + @Explain(displayName = "function name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getUDTFName() { return genericUDTF.toString(); } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/UnionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/UnionDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/UnionDesc.java (working copy) @@ -17,13 +17,15 @@ */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * unionDesc is a empty class currently. However, union has more than one input * (as compared with forward), and therefore, we need a separate class. **/ -@Explain(displayName = "Union") +@Explain(displayName = "Union", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class UnionDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; private transient int numInputs; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/UnionWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/UnionWork.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/UnionWork.java (working copy) @@ -25,7 +25,9 @@ import java.util.Map; import java.util.Set; import java.util.HashSet; + import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.mapred.JobConf; @@ -47,8 +49,8 @@ super(name); } - @Explain(displayName = "Vertex") @Override + @Explain(displayName = "Vertex", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getName() { return super.getName(); } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/UnlockDatabaseDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/UnlockDatabaseDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/UnlockDatabaseDesc.java (working copy) @@ -19,12 +19,14 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * UnlockDatabaseDesc. * */ -@Explain(displayName = "Unlock Database") +@Explain(displayName = "Unlock Database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class UnlockDatabaseDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; @@ -34,7 +36,7 @@ this.databaseName = databaseName; } - @Explain(displayName = "database") + @Explain(displayName = "database", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getDatabaseName() { return databaseName; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/UnlockTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/UnlockTableDesc.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/UnlockTableDesc.java (working copy) @@ -22,12 +22,14 @@ import java.util.Map; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + /** * UnlockTableDesc. * */ -@Explain(displayName = "Unlock Table") +@Explain(displayName = "Unlock Table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class UnlockTableDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PTFExpressionDef.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PTFExpressionDef.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PTFExpressionDef.java (working copy) @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.PTFUtils; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; public class PTFExpressionDef { @@ -59,7 +60,7 @@ this.exprNode = exprNode; } - @Explain(displayName = "expr") + @Explain(displayName = "expr", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getExprNodeExplain() { return exprNode == null ? null : exprNode.getExprString(); } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PTFQueryInputDef.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PTFQueryInputDef.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PTFQueryInputDef.java (working copy) @@ -20,13 +20,14 @@ import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType; import org.apache.hadoop.hive.ql.plan.Explain; +import org.apache.hadoop.hive.ql.plan.Explain.Level; -@Explain(displayName = "Input definition") +@Explain(displayName = "Input definition", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class PTFQueryInputDef extends PTFInputDef { private String destination; private PTFQueryInputType type; - @Explain(displayName = "destination") + @Explain(displayName = "destination", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getDestination() { return destination; } @@ -43,7 +44,7 @@ this.type = type; } - @Explain(displayName = "type") + @Explain(displayName = "type", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTypeExplain() { return type.name(); } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java (working copy) @@ -23,9 +23,10 @@ import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec; import org.apache.hadoop.hive.ql.plan.Explain; +import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator; -@Explain(displayName = "Partition table definition") +@Explain(displayName = "Partition table definition", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class PartitionedTableFunctionDef extends PTFInputDef { private String name; private String resolverClassName; @@ -40,7 +41,7 @@ private transient List referencedColumns; - @Explain(displayName = "name") + @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getName() { return name; } @@ -83,7 +84,7 @@ return partition; } - @Explain(displayName = "partition by") + @Explain(displayName = "partition by", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getPartitionExplain() { if (partition == null || partition.getExpressions() == null) { return null; @@ -110,7 +111,7 @@ this.order = order; } - @Explain(displayName = "order by") + @Explain(displayName = "order by", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getOrderExplain() { if (order == null || order.getExpressions() == null) { return null; @@ -144,7 +145,7 @@ this.args = args; } - @Explain(displayName = "arguments") + @Explain(displayName = "arguments", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getArgsExplain() { if (args == null) { return null; @@ -188,7 +189,7 @@ this.resolverClassName = resolverClassName; } - @Explain(displayName = "referenced columns") + @Explain(displayName = "referenced columns", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public List getReferencedColumns() { return referencedColumns; } Index: ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java (working copy) @@ -361,19 +361,22 @@ final FileSystem fs = path.getFileSystem(conf); - if (fs.exists(path)) { - checkPermissions(fs, path, actions, authenticator.getUserName()); + FileStatus pathStatus = FileUtils.getFileStatusOrNull(fs, path); + if (pathStatus != null) { + checkPermissions(fs, pathStatus, actions, authenticator.getUserName()); } else if (path.getParent() != null) { // find the ancestor which exists to check its permissions Path par = path.getParent(); + FileStatus parStatus = null; while (par != null) { - if (fs.exists(par)) { + parStatus = FileUtils.getFileStatusOrNull(fs, par); + if (parStatus != null) { break; } par = par.getParent(); } - checkPermissions(fs, par, actions, authenticator.getUserName()); + checkPermissions(fs, parStatus, actions, authenticator.getUserName()); } } @@ -382,18 +385,20 @@ * does not exists, it returns. */ @SuppressWarnings("deprecation") - protected static void checkPermissions(final FileSystem fs, final Path path, + protected static void checkPermissions(final FileSystem fs, final FileStatus stat, final EnumSet actions, String user) throws IOException, AccessControlException, HiveException { - try { - FileStatus stat = fs.getFileStatus(path); - for (FsAction action : actions) { - FileUtils.checkFileAccessWithImpersonation(fs, stat, action, user); - } - } catch (FileNotFoundException fnfe) { + if (stat == null) { // File named by path doesn't exist; nothing to validate. return; + } + FsAction checkActions = FsAction.NONE; + for (FsAction action : actions) { + checkActions = checkActions.or(action); + } + try { + FileUtils.checkFileAccessWithImpersonation(fs, stat, checkActions, user); } catch (org.apache.hadoop.fs.permission.AccessControlException ace) { // Older hadoop version will throw this @deprecated Exception. throw accessControlException(ace); Index: ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java (working copy) @@ -385,8 +385,7 @@ FileSystem fs; try { fs = FileSystem.get(filePath.toUri(), conf); - Path path = FileUtils.getPathOrParentThatExists(fs, filePath); - FileStatus fileStatus = fs.getFileStatus(path); + FileStatus fileStatus = FileUtils.getPathOrParentThatExists(fs, filePath); if (FileUtils.isOwnerOfFileHierarchy(fs, fileStatus, userName)) { availPrivs.addPrivilege(SQLPrivTypeGrant.OWNER_PRIV); } Index: ql/src/java/org/apache/hadoop/hive/ql/session/DependencyResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/session/DependencyResolver.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/session/DependencyResolver.java (working copy) @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.session; + +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.io.File; +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import groovy.grape.Grape; +import groovy.grape.GrapeIvy; +import groovy.lang.GroovyClassLoader; + + +public class DependencyResolver { + + private static final String HIVE_HOME = "HIVE_HOME"; + private static final String HIVE_CONF_DIR = "HIVE_CONF_DIR"; + private String ivysettingsPath; + private static LogHelper _console = new LogHelper(LogFactory.getLog("DependencyResolver")); + + public DependencyResolver() { + + // Check if HIVE_CONF_DIR is defined + if (System.getenv().containsKey(HIVE_CONF_DIR)) { + ivysettingsPath = System.getenv().get(HIVE_CONF_DIR) + "/ivysettings.xml"; + } + + // If HIVE_CONF_DIR is not defined or file is not found in HIVE_CONF_DIR then check HIVE_HOME/conf + if (ivysettingsPath == null || !(new File(ivysettingsPath).exists())) { + if (System.getenv().containsKey(HIVE_HOME)) { + ivysettingsPath = System.getenv().get(HIVE_HOME) + "/conf/ivysettings.xml"; + } + } + + // If HIVE_HOME is not defined or file is not found in HIVE_HOME/conf then load default ivysettings.xml from class loader + if (ivysettingsPath == null || !(new File(ivysettingsPath).exists())) { + URL ivysetttingsResource = ClassLoader.getSystemResource("ivysettings.xml"); + if (ivysetttingsResource != null){ + ivysettingsPath = ivysetttingsResource.getFile(); + _console.printInfo("ivysettings.xml file not found in HIVE_HOME or HIVE_CONF_DIR," + ivysettingsPath + " will be used"); + } + } + + } + + /** + * + * @param uri + * @return List of URIs of downloaded jars + * @throws URISyntaxException + * @throws IOException + */ + public List downloadDependencies(URI uri) throws URISyntaxException, IOException { + Map dependencyMap = new HashMap(); + String authority = uri.getAuthority(); + if (authority == null) { + throw new URISyntaxException(authority, "Invalid url: Expected 'org:module:version', found null"); + } + String[] authorityTokens = authority.toLowerCase().split(":"); + + if (authorityTokens.length != 3) { + throw new URISyntaxException(authority, "Invalid url: Expected 'org:module:version', found " + authority); + } + + dependencyMap.put("org", authorityTokens[0]); + dependencyMap.put("module", authorityTokens[1]); + dependencyMap.put("version", authorityTokens[2]); + Map queryMap = parseQueryString(uri.getQuery()); + if (queryMap != null) { + dependencyMap.putAll(queryMap); + } + return grab(dependencyMap); + } + + /** + * @param queryString + * @return queryMap Map which contains grape parameters such as transitive, exclude, ext and classifier. + * Example: Input: ext=jar&exclude=org.mortbay.jetty:jetty&transitive=true + * Output: {[ext]:[jar], [exclude]:{[group]:[org.mortbay.jetty], [module]:[jetty]}, [transitive]:[true]} + * @throws URISyntaxException + */ + private Map parseQueryString(String queryString) throws URISyntaxException { + if (queryString == null || queryString.isEmpty()) { + return null; + } + List> excludeList = new LinkedList>(); + Map queryMap = new HashMap(); + String[] mapTokens = queryString.split("&"); + for (String tokens : mapTokens) { + String[] mapPair = tokens.split("="); + if (mapPair.length != 2) { + throw new RuntimeException("Invalid query string: " + queryString); + } + if (mapPair[0].equals("exclude")) { + excludeList.addAll(computeExcludeList(mapPair[1])); + } else if (mapPair[0].equals("transitive")) { + if (mapPair[1].toLowerCase().equals("true")) { + queryMap.put(mapPair[0], true); + } else { + queryMap.put(mapPair[0], false); + } + } else { + queryMap.put(mapPair[0], mapPair[1]); + } + } + if (!excludeList.isEmpty()) { + queryMap.put("exclude", excludeList); + } + return queryMap; + } + + private List> computeExcludeList(String excludeString) throws URISyntaxException { + String excludes[] = excludeString.split(","); + List> excludeList = new LinkedList>(); + for (String exclude : excludes) { + Map tempMap = new HashMap(); + String args[] = exclude.split(":"); + if (args.length != 2) { + throw new URISyntaxException(excludeString, + "Invalid exclude string: expected 'org:module,org:module,..', found " + excludeString); + } + tempMap.put("group", args[0]); + tempMap.put("module", args[1]); + excludeList.add(tempMap); + } + return excludeList; + } + + /** + * + * @param dependencies + * @return List of URIs of downloaded jars + * @throws IOException + */ + private List grab(Map dependencies) throws IOException { + Map args = new HashMap(); + URI[] localUrls; + + //grape expects excludes key in args map + if (dependencies.containsKey("exclude")) { + args.put("excludes", dependencies.get("exclude")); + } + + //Set transitive to true by default + if (!dependencies.containsKey("transitive")) { + dependencies.put("transitive", true); + } + + args.put("classLoader", new GroovyClassLoader()); + System.setProperty("grape.config", ivysettingsPath); + System.setProperty("groovy.grape.report.downloads", "true"); + localUrls = Grape.resolve(args, dependencies); + if (localUrls == null) { + throw new IOException("Not able to download all the dependencies.."); + } + return Arrays.asList(localUrls); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/session/OperationLog.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/session/OperationLog.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/session/OperationLog.java (working copy) @@ -20,6 +20,7 @@ import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.io.IOUtils; import java.io.*; @@ -36,12 +37,40 @@ private final String operationName; private final LogFile logFile; + private LoggingLevel opLoggingLevel = LoggingLevel.UNKNOWN; - public OperationLog(String name, File file) throws FileNotFoundException{ + public static enum LoggingLevel { + NONE, EXECUTION, PERFORMANCE, VERBOSE, UNKNOWN + } + + public OperationLog(String name, File file, HiveConf hiveConf) throws FileNotFoundException { operationName = name; logFile = new LogFile(file); + + if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { + String logLevel = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL); + opLoggingLevel = getLoggingLevel(logLevel); + } } + public static LoggingLevel getLoggingLevel (String mode) { + if (mode.equalsIgnoreCase("none")) { + return LoggingLevel.NONE; + } else if (mode.equalsIgnoreCase("execution")) { + return LoggingLevel.EXECUTION; + } else if (mode.equalsIgnoreCase("verbose")) { + return LoggingLevel.VERBOSE; + } else if (mode.equalsIgnoreCase("performance")) { + return LoggingLevel.PERFORMANCE; + } else { + return LoggingLevel.UNKNOWN; + } + } + + public LoggingLevel getOpLoggingLevel() { + return opLoggingLevel; + } + /** * Singleton OperationLog object per thread. */ Index: ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (working copy) @@ -24,6 +24,7 @@ import java.io.InputStream; import java.io.PrintStream; import java.net.URI; +import java.net.URISyntaxException; import java.net.URLClassLoader; import java.sql.Timestamp; import java.util.ArrayList; @@ -32,6 +33,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -269,6 +271,9 @@ */ private Timestamp queryCurrentTimestamp; + private ResourceMaps resourceMaps; + + private DependencyResolver dependencyResolver; /** * Get the lineage state stored in this session. * @@ -334,6 +339,8 @@ this.userName = userName; isSilent = conf.getBoolVar(HiveConf.ConfVars.HIVESESSIONSILENT); ls = new LineageState(); + resourceMaps = new ResourceMaps(); + dependencyResolver = new DependencyResolver(); // Must be deterministic order map for consistent q-test output across Java versions overriddenConfigurations = new LinkedHashMap(); overriddenConfigurations.putAll(HiveConf.getConfSystemProperties()); @@ -1119,9 +1126,8 @@ return null; } - private final HashMap> resource_map = - new HashMap>(); + public String add_resource(ResourceType t, String value) throws RuntimeException { return add_resource(t, value, false); } @@ -1143,37 +1149,88 @@ public List add_resources(ResourceType t, Collection values, boolean convertToUnix) throws RuntimeException { - Set resourceMap = getResourceMap(t); - + Set resourceSet = resourceMaps.getResourceSet(t); + Map> resourcePathMap = resourceMaps.getResourcePathMap(t); + Map> reverseResourcePathMap = resourceMaps.getReverseResourcePathMap(t); List localized = new ArrayList(); try { for (String value : values) { - localized.add(downloadResource(value, convertToUnix)); + String key; + + //get the local path of downloaded jars. + List downloadedURLs = resolveAndDownload(t, value, convertToUnix); + + if (getURLType(value).equals("ivy")) { + // get the key to store in map + key = new URI(value).getAuthority(); + } else { + // for local file and hdfs, key and value are same. + key = downloadedURLs.get(0).toString(); + } + Set downloadedValues = new HashSet(); + + for (URI uri : downloadedURLs) { + String resourceValue = uri.toString(); + downloadedValues.add(resourceValue); + localized.add(resourceValue); + if (reverseResourcePathMap.containsKey(resourceValue)) { + if (!reverseResourcePathMap.get(resourceValue).contains(key)) { + reverseResourcePathMap.get(resourceValue).add(key); + } + } else { + Set addSet = new HashSet(); + addSet.add(key); + reverseResourcePathMap.put(resourceValue, addSet); + + } + } + resourcePathMap.put(key, downloadedValues); } + t.preHook(resourceSet, localized); - t.preHook(resourceMap, localized); - } catch (RuntimeException e) { - getConsole().printError(e.getMessage(), "\n" - + org.apache.hadoop.util.StringUtils.stringifyException(e)); + getConsole().printError(e.getMessage(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); throw e; + } catch (URISyntaxException e) { + getConsole().printError(e.getMessage()); + throw new RuntimeException(e); + } catch (IOException e) { + getConsole().printError(e.getMessage()); + throw new RuntimeException(e); } - getConsole().printInfo("Added resources: " + values); - resourceMap.addAll(localized); - + resourceSet.addAll(localized); return localized; } - private Set getResourceMap(ResourceType t) { - Set result = resource_map.get(t); - if (result == null) { - result = new HashSet(); - resource_map.put(t, result); + private static String getURLType(String value) throws URISyntaxException { + URI uri = new URI(value); + String scheme = uri.getScheme() == null ? null : uri.getScheme().toLowerCase(); + if (scheme == null || scheme.equals("file")) { + return "file"; + } else if (scheme.equals("hdfs") || scheme.equals("ivy")) { + return scheme; + } else { + throw new RuntimeException("invalid url: " + uri + ", expecting ( file | hdfs | ivy) as url scheme. "); } - return result; } + List resolveAndDownload(ResourceType t, String value, boolean convertToUnix) throws URISyntaxException, + IOException { + URI uri = new URI(value); + if (getURLType(value).equals("file")) { + return Arrays.asList(uri); + } else if (getURLType(value).equals("ivy")) { + return dependencyResolver.downloadDependencies(uri); + } else if (getURLType(value).equals("hdfs")) { + return Arrays.asList(new URI(downloadResource(value, convertToUnix))); + } else { + throw new RuntimeException("Invalid url " + uri); + } + } + + + /** * Returns true if it is from any external File Systems except local */ @@ -1218,16 +1275,49 @@ return value; } - public void delete_resources(ResourceType t, List value) { - Set resources = resource_map.get(t); - if (resources != null && !resources.isEmpty()) { - t.postHook(resources, value); - resources.removeAll(value); + public void delete_resources(ResourceType t, List values) { + Set resources = resourceMaps.getResourceSet(t); + if (resources == null || resources.isEmpty()) { + return; } + + Map> resourcePathMap = resourceMaps.getResourcePathMap(t); + Map> reverseResourcePathMap = resourceMaps.getReverseResourcePathMap(t); + List deleteList = new LinkedList(); + for (String value : values) { + String key = value; + try { + if (getURLType(value).equals("ivy")) { + key = new URI(value).getAuthority(); + } + } catch (URISyntaxException e) { + throw new RuntimeException("Invalid uri string " + value + ", " + e.getMessage()); + } + + // get all the dependencies to delete + + Set resourcePaths = resourcePathMap.get(key); + if (resourcePaths == null) { + return; + } + for (String resourceValue : resourcePaths) { + reverseResourcePathMap.get(resourceValue).remove(key); + + // delete a dependency only if no other resource depends on it. + if (reverseResourcePathMap.get(resourceValue).isEmpty()) { + deleteList.add(resourceValue); + reverseResourcePathMap.remove(resourceValue); + } + } + resourcePathMap.remove(key); + } + t.postHook(resources, deleteList); + resources.removeAll(deleteList); } + public Set list_resource(ResourceType t, List filter) { - Set orig = resource_map.get(t); + Set orig = resourceMaps.getResourceSet(t); if (orig == null) { return null; } @@ -1245,10 +1335,10 @@ } public void delete_resources(ResourceType t) { - Set resources = resource_map.get(t); + Set resources = resourceMaps.getResourceSet(t); if (resources != null && !resources.isEmpty()) { delete_resources(t, new ArrayList(resources)); - resource_map.remove(t); + resourceMaps.getResourceMap().remove(t); } } @@ -1512,3 +1602,51 @@ return queryCurrentTimestamp; } } + +class ResourceMaps { + + private final Map> resource_map; + //Given jar to add is stored as key and all its transitive dependencies as value. Used for deleting transitive dependencies. + private final Map>> resource_path_map; + // stores all the downloaded resources as key and the jars which depend on these resources as values in form of a list. Used for deleting transitive dependencies. + private final Map>> reverse_resource_path_map; + + public ResourceMaps() { + resource_map = new HashMap>(); + resource_path_map = new HashMap>>(); + reverse_resource_path_map = new HashMap>>(); + + } + + public Map> getResourceMap() { + return resource_map; + } + + public Set getResourceSet(SessionState.ResourceType t) { + Set result = resource_map.get(t); + if (result == null) { + result = new HashSet(); + resource_map.put(t, result); + } + return result; + } + + public Map> getResourcePathMap(SessionState.ResourceType t) { + Map> result = resource_path_map.get(t); + if (result == null) { + result = new HashMap>(); + resource_path_map.put(t, result); + } + return result; + } + + public Map> getReverseResourcePathMap(SessionState.ResourceType t) { + Map> result = reverse_resource_path_map.get(t); + if (result == null) { + result = new HashMap>(); + reverse_resource_path_map.put(t, result); + } + return result; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsAggregator.java (working copy) @@ -82,7 +82,7 @@ Utilities.SQLCommand setQueryTimeout = new Utilities.SQLCommand() { @Override public Void run(PreparedStatement stmt) throws SQLException { - stmt.setQueryTimeout(timeout); + Utilities.setQueryTimeout(stmt, timeout); return null; } }; Index: ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/stats/jdbc/JDBCStatsPublisher.java (working copy) @@ -82,7 +82,7 @@ Utilities.SQLCommand setQueryTimeout = new Utilities.SQLCommand() { @Override public Void run(PreparedStatement stmt) throws SQLException { - stmt.setQueryTimeout(timeout); + Utilities.setQueryTimeout(stmt, timeout); return null; } }; @@ -279,7 +279,7 @@ conn = DriverManager.getConnection(connectionString); stmt = conn.createStatement(); - stmt.setQueryTimeout(timeout); + Utilities.setQueryTimeout(stmt, timeout); // TODO: why is this not done using Hive db scripts? // Check if the table exists Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java (working copy) @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDayOfMonthLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDayOfMonthString; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -38,9 +39,12 @@ * */ @Description(name = "day,dayofmonth", - value = "_FUNC_(date) - Returns the date of the month of date", - extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " - + "'yyyy-MM-dd'.\n" + value = "_FUNC_(param) - Returns the day of the month of date/timestamp, or day component of interval", + extended = "param can be one of:\n" + + "1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'.\n" + + "2. A date value\n" + + "3. A timestamp value\n" + + "4. A day-time interval value" + "Example:\n " + " > SELECT _FUNC_('2009-07-30') FROM src LIMIT 1;\n" + " 30") @VectorizedExpressions({VectorUDFDayOfMonthLong.class, VectorUDFDayOfMonthString.class}) @@ -98,4 +102,12 @@ return result; } + public IntWritable evaluate(HiveIntervalDayTimeWritable i) { + if (i == null) { + return null; + } + + result.set(i.getHiveIntervalDayTime().getDays()); + return result; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java (working copy) @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourString; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -37,9 +38,11 @@ * */ @Description(name = "hour", - value = "_FUNC_(date) - Returns the hour of date", - extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " - + "'HH:mm:ss'.\n" + value = "_FUNC_(param) - Returns the hour componemnt of the string/timestamp/interval", + extended ="param can be one of:\n" + + "1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.\n" + + "2. A timestamp value\n" + + "3. A day-time interval value" + "Example:\n " + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 12\n" @@ -95,4 +98,12 @@ return result; } + public IntWritable evaluate(HiveIntervalDayTimeWritable i) { + if (i == null) { + return null; + } + + result.set(i.getHiveIntervalDayTime().getHours()); + return result; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java (working copy) @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteString; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -37,9 +38,11 @@ * */ @Description(name = "minute", - value = "_FUNC_(date) - Returns the minute of date", - extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " - + "'HH:mm:ss'.\n" + value = "_FUNC_(param) - Returns the minute component of the string/timestamp/interval", + extended = "param can be one of:\n" + + "1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.\n" + + "2. A timestamp value\n" + + "3. A day-time interval value" + "Example:\n " + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 58\n" @@ -95,4 +98,12 @@ return result; } + public IntWritable evaluate(HiveIntervalDayTimeWritable i) { + if (i == null) { + return null; + } + + result.set(i.getHiveIntervalDayTime().getMinutes()); + return result; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java (working copy) @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthString; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -38,8 +39,13 @@ * */ @Description(name = "month", - value = "_FUNC_(date) - Returns the month of date", - extended = "Example:\n" + value = "_FUNC_(param) - Returns the month component of the date/timestamp/interval", + extended = "param can be one of:\n" + + "1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'.\n" + + "2. A date value\n" + + "3. A timestamp value\n" + + "4. A year-month interval value" + + "Example:\n" + " > SELECT _FUNC_('2009-07-30') FROM src LIMIT 1;\n" + " 7") @VectorizedExpressions({VectorUDFMonthLong.class, VectorUDFMonthString.class}) public class UDFMonth extends UDF { @@ -94,4 +100,12 @@ return result; } + public IntWritable evaluate(HiveIntervalYearMonthWritable i) { + if (i == null) { + return null; + } + + result.set(i.getHiveIntervalYearMonth().getMonths()); + return result; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java (working copy) @@ -23,23 +23,29 @@ import java.util.Calendar; import java.util.Date; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondString; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateUtils; /** * UDFSecond. * */ @Description(name = "second", - value = "_FUNC_(date) - Returns the second of date", - extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " - + "'HH:mm:ss'.\n" + value = "_FUNC_(date) - Returns the second component of the string/timestamp/interval", + extended = "param can be one of:\n" + + "1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'.\n" + + "2. A timestamp value\n" + + "3. A day-time interval value" + "Example:\n " + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 59\n" @@ -96,4 +102,13 @@ return result; } + public IntWritable evaluate(HiveIntervalDayTimeWritable i) { + if (i == null) { + return null; + } + + HiveIntervalDayTime idt = i.getHiveIntervalDayTime(); + result.set(idt.getSeconds()); + return result; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java (working copy) @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearString; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -38,9 +39,12 @@ * */ @Description(name = "year", - value = "_FUNC_(date) - Returns the year of date", - extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " - + "'yyyy-MM-dd'.\n" + value = "_FUNC_(param) - Returns the year component of the date/timestamp/interval", + extended = "param can be one of:\n" + + "1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'.\n" + + "2. A date value\n" + + "3. A timestamp value\n" + + "4. A year-month interval value" + "Example:\n " + " > SELECT _FUNC_('2009-07-30') FROM src LIMIT 1;\n" + " 2009") @VectorizedExpressions({VectorUDFYearLong.class, VectorUDFYearString.class}) @@ -98,4 +102,12 @@ return result; } + public IntWritable evaluate(HiveIntervalYearMonthWritable i) { + if (i == null) { + return null; + } + + result.set(i.getHiveIntervalYearMonth().getYears()); + return result; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java (working copy) @@ -501,6 +501,10 @@ return null; } Object writableValue = converters[i].convert(obj); + // if string can not be parsed converter will return null + if (writableValue == null) { + return null; + } Timestamp ts = ((TimestampWritable) writableValue).getTimestamp(); return ts; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFormatNumber.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFormatNumber.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFormatNumber.java (working copy) @@ -147,8 +147,14 @@ @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - int dValue = ((IntObjectInspector) argumentOIs[1]).get(arguments[1].get()); + Object arg0; + Object arg1; + if ((arg0 = arguments[0].get()) == null || (arg1 = arguments[1].get()) == null) { + return null; + } + int dValue = ((IntObjectInspector) argumentOIs[1]).get(arg1); + if (dValue < 0) { throw new HiveException("Argument 2 of function FORMAT_NUMBER must be >= 0, but \"" + dValue + "\" was found"); @@ -181,26 +187,26 @@ switch (xObjectInspector.getPrimitiveCategory()) { case VOID: case DOUBLE: - xDoubleValue = ((DoubleObjectInspector) argumentOIs[0]).get(arguments[0].get()); + xDoubleValue = ((DoubleObjectInspector) argumentOIs[0]).get(arg0); resultText.set(numberFormat.format(xDoubleValue)); break; case FLOAT: - xFloatValue = ((FloatObjectInspector) argumentOIs[0]).get(arguments[0].get()); + xFloatValue = ((FloatObjectInspector) argumentOIs[0]).get(arg0); resultText.set(numberFormat.format(xFloatValue)); break; case DECIMAL: xDecimalValue = ((HiveDecimalObjectInspector) argumentOIs[0]) - .getPrimitiveJavaObject(arguments[0].get()); + .getPrimitiveJavaObject(arg0); resultText.set(numberFormat.format(xDecimalValue.bigDecimalValue())); break; case BYTE: case SHORT: case INT: - xIntValue = ((IntObjectInspector) argumentOIs[0]).get(arguments[0].get()); + xIntValue = ((IntObjectInspector) argumentOIs[0]).get(arg0); resultText.set(numberFormat.format(xIntValue)); break; case LONG: - xLongValue = ((LongObjectInspector) argumentOIs[0]).get(arguments[0].get()); + xLongValue = ((LongObjectInspector) argumentOIs[0]).get(arg0); resultText.set(numberFormat.format(xLongValue)); break; default: Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java (working copy) @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import static java.math.BigDecimal.ROUND_HALF_UP; +import static java.util.Calendar.DATE; +import static java.util.Calendar.HOUR_OF_DAY; +import static java.util.Calendar.MINUTE; +import static java.util.Calendar.MONTH; +import static java.util.Calendar.SECOND; +import static java.util.Calendar.YEAR; +import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP; +import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; + +import java.math.BigDecimal; +import java.util.Calendar; +import java.util.Date; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +/** + * UDFMonthsBetween. + * + */ +@Description(name = "months_between", value = "_FUNC_(date1, date2) - returns number of months between dates date1 and date2", + extended = "If date1 is later than date2, then the result is positive. " + + "If date1 is earlier than date2, then the result is negative. " + + "If date1 and date2 are either the same days of the month or both last days of months, " + + "then the result is always an integer. " + + "Otherwise the UDF calculates the fractional portion of the result based on a 31-day " + + "month and considers the difference in time components date1 and date2.\n" + + "date1 and date2 type can be date, timestamp or string in the format " + + "'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss'. " + + "The result is rounded to 8 decimal places.\n" + + " Example:\n" + + " > SELECT _FUNC_('1997-02-28 10:30:00', '1996-10-30');\n 3.94959677") +public class GenericUDFMonthsBetween extends GenericUDF { + private transient Converter[] tsConverters = new Converter[2]; + private transient PrimitiveCategory[] tsInputTypes = new PrimitiveCategory[2]; + private transient Converter[] dtConverters = new Converter[2]; + private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[2]; + private final Calendar cal1 = Calendar.getInstance(); + private final Calendar cal2 = Calendar.getInstance(); + private final DoubleWritable output = new DoubleWritable(); + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + checkArgsSize(arguments, 2, 2); + + checkArgPrimitive(arguments, 0); + checkArgPrimitive(arguments, 1); + + // the function should support both short date and full timestamp format + // time part of the timestamp should not be skipped + checkArgGroups(arguments, 0, tsInputTypes, STRING_GROUP, DATE_GROUP); + checkArgGroups(arguments, 1, tsInputTypes, STRING_GROUP, DATE_GROUP); + + checkArgGroups(arguments, 0, dtInputTypes, STRING_GROUP, DATE_GROUP); + checkArgGroups(arguments, 1, dtInputTypes, STRING_GROUP, DATE_GROUP); + + obtainTimestampConverter(arguments, 0, tsInputTypes, tsConverters); + obtainTimestampConverter(arguments, 1, tsInputTypes, tsConverters); + + obtainDateConverter(arguments, 0, dtInputTypes, dtConverters); + obtainDateConverter(arguments, 1, dtInputTypes, dtConverters); + + ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + // the function should support both short date and full timestamp format + // time part of the timestamp should not be skipped + Date date1 = getTimestampValue(arguments, 0, tsConverters); + if (date1 == null) { + date1 = getDateValue(arguments, 0, dtInputTypes, dtConverters); + if (date1 == null) { + return null; + } + } + + Date date2 = getTimestampValue(arguments, 1, tsConverters); + if (date2 == null) { + date2 = getDateValue(arguments, 1, dtInputTypes, dtConverters); + if (date2 == null) { + return null; + } + } + + cal1.setTime(date1); + cal2.setTime(date2); + + // skip day/time part if both dates are end of the month + // or the same day of the month + int monDiffInt = (cal1.get(YEAR) - cal2.get(YEAR)) * 12 + (cal1.get(MONTH) - cal2.get(MONTH)); + if (cal1.get(DATE) == cal2.get(DATE) + || (cal1.get(DATE) == cal1.getActualMaximum(DATE) && cal2.get(DATE) == cal2 + .getActualMaximum(DATE))) { + output.set(monDiffInt); + return output; + } + + int sec1 = getDayPartInSec(cal1); + int sec2 = getDayPartInSec(cal2); + + // 1 sec is 0.000000373 months (1/2678400). 1 month is 31 days. + // there should be no adjustments for leap seconds + double monBtwDbl = monDiffInt + (sec1 - sec2) / 2678400D; + // Round a double to 8 decimal places. + double result = BigDecimal.valueOf(monBtwDbl).setScale(8, ROUND_HALF_UP).doubleValue(); + output.set(result); + return output; + } + + protected int getDayPartInSec(Calendar cal) { + int dd = cal.get(DATE); + int HH = cal.get(HOUR_OF_DAY); + int mm = cal.get(MINUTE); + int ss = cal.get(SECOND); + int dayInSec = dd * 86400 + HH * 3600 + mm * 60 + ss; + return dayInSec; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString(getFuncName(), children); + } + + @Override + protected String getFuncName() { + return "months_between"; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java (revision 1673556) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java (working copy) @@ -20,10 +20,14 @@ import java.util.AbstractList; import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -60,10 +64,42 @@ @SuppressWarnings("deprecation") public class WindowingTableFunction extends TableFunctionEvaluator { + public static final Log LOG =LogFactory.getLog(WindowingTableFunction.class.getName()); + static class WindowingFunctionInfoHelper { + private boolean supportsWindow; + WindowingFunctionInfoHelper() { + } + + public WindowingFunctionInfoHelper(boolean supportsWindow) { + this.supportsWindow = supportsWindow; + } + + public boolean isSupportsWindow() { + return supportsWindow; + } + public void setSupportsWindow(boolean supportsWindow) { + this.supportsWindow = supportsWindow; + } + } + StreamingState streamingState; RankLimit rnkLimitDef; + + // There is some information about the windowing functions that needs to be initialized + // during query compilation time, and made available to during the map/reduce tasks via + // plan serialization. + Map windowingFunctionHelpers = null; + public Map getWindowingFunctionHelpers() { + return windowingFunctionHelpers; + } + + public void setWindowingFunctionHelpers( + Map windowingFunctionHelpers) { + this.windowingFunctionHelpers = windowingFunctionHelpers; + } + @SuppressWarnings({ "unchecked", "rawtypes" }) @Override public void execute(PTFPartitionIterator pItr, PTFPartition outP) throws HiveException { @@ -147,9 +183,8 @@ private boolean streamingPossible(Configuration cfg, WindowFunctionDef wFnDef) throws HiveException { WindowFrameDef wdwFrame = wFnDef.getWindowFrame(); - WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(wFnDef - .getName()); + WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFnDef.getName()); if (!wFnInfo.isSupportsWindow()) { return true; } @@ -259,7 +294,46 @@ return new int[] {precedingSpan, followingSpan}; } + private void initializeWindowingFunctionInfoHelpers() throws SemanticException { + // getWindowFunctionInfo() cannot be called during map/reduce tasks. So cache necessary + // values during query compilation, and rely on plan serialization to bring this info + // to the object during the map/reduce tasks. + if (windowingFunctionHelpers != null) { + return; + } + + windowingFunctionHelpers = new HashMap(); + WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef(); + for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) { + WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i); + GenericUDAFEvaluator fnEval = wFn.getWFnEval(); + WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(wFn.getName()); + boolean supportsWindow = wFnInfo.isSupportsWindow(); + windowingFunctionHelpers.put(wFn.getName(), new WindowingFunctionInfoHelper(supportsWindow)); + } + } + @Override + protected void setOutputOI(StructObjectInspector outputOI) { + super.setOutputOI(outputOI); + // Call here because at this point the WindowTableFunctionDef has been set + try { + initializeWindowingFunctionInfoHelpers(); + } catch (SemanticException err) { + throw new RuntimeException("Unexpected error while setting up windowing function", err); + } + } + + private WindowingFunctionInfoHelper getWindowingFunctionInfoHelper(String fnName) { + WindowingFunctionInfoHelper wFnInfoHelper = windowingFunctionHelpers.get(fnName); + if (wFnInfoHelper == null) { + // Should not happen + throw new RuntimeException("No cached WindowingFunctionInfoHelper for " + fnName); + } + return wFnInfoHelper; + } + + @Override public void initializeStreaming(Configuration cfg, StructObjectInspector inputOI, boolean isMapSide) throws HiveException { @@ -412,8 +486,7 @@ if (fnEval instanceof ISupportStreamingModeForWindowing) { fnEval.terminate(streamingState.aggBuffers[i]); - WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(wFn - .getName()); + WindowingFunctionInfoHelper wFnInfo = getWindowingFunctionInfoHelper(wFn.getName()); if (!wFnInfo.isSupportsWindow()) { numRowsRemaining = ((ISupportStreamingModeForWindowing) fnEval) .getRowsRemainingAfterTerminate(); Index: ql/src/test/org/apache/hadoop/hive/ql/TestErrorMsg.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/TestErrorMsg.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/TestErrorMsg.java (working copy) @@ -23,9 +23,11 @@ import junit.framework.Assert; import junit.framework.TestCase; +import org.junit.Test; -public class TestErrorMsg extends TestCase { +public class TestErrorMsg { + @Test public void testUniqueErrorCode() { Set numbers = new HashSet(); for (ErrorMsg err : ErrorMsg.values()) { @@ -33,4 +35,15 @@ Assert.assertTrue("duplicated error number " + code, numbers.add(code)); } } + @Test + public void testReverseMatch() { + testReverseMatch(ErrorMsg.OP_NOT_ALLOWED_IN_AUTOCOMMIT, "COMMIT"); + testReverseMatch(ErrorMsg.OP_NOT_ALLOWED_IN_TXN, "ALTER TABLE", "1"); + testReverseMatch(ErrorMsg.OP_NOT_ALLOWED_WITHOUT_TXN, "ROLLBACK"); + } + private void testReverseMatch(ErrorMsg errorMsg, String... args) { + String parametrizedMsg = errorMsg.format(args); + ErrorMsg canonicalMsg = ErrorMsg.getErrorMsg(parametrizedMsg); + Assert.assertEquals("Didn't find expected msg", errorMsg.getErrorCode(), canonicalMsg.getErrorCode()); + } } Index: ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java (working copy) @@ -0,0 +1,189 @@ +package org.apache.hadoop.hive.ql; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.orc.FileDump; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FilenameFilter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; + +/** + * TODO: this should be merged with TestTxnCommands once that is checked in + * specifically the tests; the supporting code here is just a clone of TestTxnCommands + */ +public class TestTxnCommands2 { + private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") + + File.separator + TestTxnCommands2.class.getCanonicalName() + + "-" + System.currentTimeMillis() + ).getPath().replaceAll("\\\\", "/"); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + //bucket count for test tables; set it to 1 for easier debugging + private static int BUCKET_COUNT = 2; + @Rule + public TestName testName = new TestName(); + private HiveConf hiveConf; + private Driver d; + private static enum Table { + ACIDTBL("acidTbl"), + NONACIDORCTBL("nonAcidOrcTbl"); + + private final String name; + @Override + public String toString() { + return name; + } + Table(String name) { + this.name = name; + } + } + + @Before + public void setUp() throws Exception { + hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + TxnDbUtil.setConfValues(hiveConf); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVEENFORCEBUCKETING, true); + TxnDbUtil.prepDb(); + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); + } + if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { + throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR); + } + SessionState.start(new SessionState(hiveConf)); + d = new Driver(hiveConf); + dropTables(); + runStatementOnDriver("create table " + Table.ACIDTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); + runStatementOnDriver("create table " + Table.NONACIDORCTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); + } + private void dropTables() throws Exception { + for(Table t : Table.values()) { + runStatementOnDriver("drop table if exists " + t); + } + } + @After + public void tearDown() throws Exception { + try { + if (d != null) { + // runStatementOnDriver("set autocommit true"); + dropTables(); + d.destroy(); + d.close(); + d = null; + TxnDbUtil.cleanDb(); + } + } finally { + FileUtils.deleteDirectory(new File(TEST_DATA_DIR)); + } + } + @Ignore("not needed but useful for testing") + @Test + public void testNonAcidInsert() throws Exception { + runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); + List rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); + runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(2,3)"); + List rs1 = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); + } + @Test + public void testDeleteIn() throws Exception { + int[][] tableData = {{1,2},{3,2},{5,2},{1,3},{3,3},{5,3}}; + runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData)); + runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,7),(3,7)"); + //todo: once multistatement txns are supported, add a test to run next 2 statements in a single txn + runStatementOnDriver("delete from " + Table.ACIDTBL + " where a in(select a from " + Table.NONACIDORCTBL + ")"); + runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) select a,b from " + Table.NONACIDORCTBL); + List rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b"); + int[][] updatedData = {{1,7},{3,7},{5,2},{5,3}}; + Assert.assertEquals("Bulk update failed", stringifyValues(updatedData), rs); + runStatementOnDriver("update " + Table.ACIDTBL + " set b=19 where b in(select b from " + Table.NONACIDORCTBL + " where a = 3)"); + List rs2 = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b"); + int[][] updatedData2 = {{1,19},{3,19},{5,2},{5,3}}; + Assert.assertEquals("Bulk update2 failed", stringifyValues(updatedData2), rs2); + } + + /** + * takes raw data and turns it into a string as if from Driver.getResults() + * sorts rows in dictionary order + */ + private List stringifyValues(int[][] rowsIn) { + assert rowsIn.length > 0; + int[][] rows = rowsIn.clone(); + Arrays.sort(rows, new RowComp()); + List rs = new ArrayList(); + for(int[] row : rows) { + assert row.length > 0; + StringBuilder sb = new StringBuilder(); + for(int value : row) { + sb.append(value).append("\t"); + } + sb.setLength(sb.length() - 1); + rs.add(sb.toString()); + } + return rs; + } + private static final class RowComp implements Comparator { + public int compare(int[] row1, int[] row2) { + assert row1 != null && row2 != null && row1.length == row2.length; + for(int i = 0; i < row1.length; i++) { + int comp = Integer.compare(row1[i], row2[i]); + if(comp != 0) { + return comp; + } + } + return 0; + } + } + private String makeValuesClause(int[][] rows) { + assert rows.length > 0; + StringBuilder sb = new StringBuilder("values"); + for(int[] row : rows) { + assert row.length > 0; + if(row.length > 1) { + sb.append("("); + } + for(int value : row) { + sb.append(value).append(","); + } + sb.setLength(sb.length() - 1);//remove trailing comma + if(row.length > 1) { + sb.append(")"); + } + sb.append(","); + } + sb.setLength(sb.length() - 1);//remove trailing comma + return sb.toString(); + } + + private List runStatementOnDriver(String stmt) throws Exception { + CommandProcessorResponse cpr = d.run(stmt); + if(cpr.getResponseCode() != 0) { + throw new RuntimeException(stmt + " failed: " + cpr); + } + List rs = new ArrayList(); + d.getResults(rs); + return rs; + } +} Index: ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java (working copy) @@ -50,10 +50,10 @@ BytesBytesMultiHashMap map = new BytesBytesMultiHashMap(CAPACITY, LOAD_FACTOR, WB_SIZE); RandomKvSource kv = new RandomKvSource(0, 0); map.put(kv, -1); - verifyResults(map, kv.getLastKey(), kv.getLastValue()); + verifyHashMapResult(map, kv.getLastKey(), kv.getLastValue()); kv = new RandomKvSource(10, 100); map.put(kv, -1); - verifyResults(map, kv.getLastKey(), kv.getLastValue()); + verifyHashMapResult(map, kv.getLastKey(), kv.getLastValue()); } @Test @@ -61,12 +61,12 @@ BytesBytesMultiHashMap map = new BytesBytesMultiHashMap(CAPACITY, LOAD_FACTOR, WB_SIZE); RandomKvSource kv = new RandomKvSource(0, 100); map.put(kv, -1); - verifyResults(map, kv.getLastKey(), kv.getLastValue()); + verifyHashMapResult(map, kv.getLastKey(), kv.getLastValue()); FixedKeyKvSource kv2 = new FixedKeyKvSource(kv.getLastKey(), 0, 100); kv2.values.add(kv.getLastValue()); for (int i = 0; i < 3; ++i) { map.put(kv2, -1); - verifyResults(map, kv2.key, kv2.values.toArray(new byte[kv2.values.size()][])); + verifyHashMapResult(map, kv2.key, kv2.values.toArray(new byte[kv2.values.size()][])); } } @@ -80,11 +80,11 @@ FixedKeyKvSource kv2 = new FixedKeyKvSource(kv.getLastKey(), 0, 100); map.put(kv2, -1); key[0] = (byte)(key[0] + 1); - List results = new ArrayList(0); - map.getValueRefs(key, key.length, results); - assertTrue(results.isEmpty()); - map.getValueRefs(key, 0, results); - assertTrue(results.isEmpty()); + BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); + map.getValueResult(key, 0, key.length, hashMapResult); + assertTrue(!hashMapResult.hasRows()); + map.getValueResult(key, 0, 0, hashMapResult); + assertTrue(!hashMapResult.hasRows()); } @Test @@ -96,13 +96,12 @@ map.put(kv, -1); } for (int i = 0; i < kv.keys.size(); ++i) { - verifyResults(map, kv.keys.get(i), kv.values.get(i)); + verifyHashMapResult(map, kv.keys.get(i), kv.values.get(i)); } assertEquals(CAPACITY, map.getCapacity()); // Get of non-existent key should terminate.. - List results = new ArrayList(0); - map.getValueRefs(new byte[0], 0, results); - assertTrue(results.isEmpty()); + BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); + map.getValueResult(new byte[0], 0, 0, hashMapResult); } @Test @@ -113,23 +112,29 @@ for (int i = 0; i < 18; ++i) { map.put(kv, -1); for (int j = 0; j <= i; ++j) { - verifyResults(map, kv.keys.get(j), kv.values.get(j)); + verifyHashMapResult(map, kv.keys.get(j), kv.values.get(j)); } } assertEquals(1 << 18, map.getCapacity()); } - private void verifyResults(BytesBytesMultiHashMap map, byte[] key, byte[]... values) { - List results = new ArrayList(0); - byte state = map.getValueRefs(key, key.length, results); - assertEquals(state, results.size()); - assertEquals(values.length, results.size()); + private void verifyHashMapResult(BytesBytesMultiHashMap map, byte[] key, byte[]... values) { + BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); + byte state = map.getValueResult(key, 0, key.length, hashMapResult); HashSet hs = new HashSet(); - for (int i = 0; i < results.size(); ++i) { - WriteBuffers.ByteSegmentRef result = results.get(i); - map.populateValue(result); - hs.add(result.copy()); + int count = 0; + if (hashMapResult.hasRows()) { + WriteBuffers.ByteSegmentRef ref = hashMapResult.first(); + while (ref != null) { + count++; + hs.add(ref.copy()); + ref = hashMapResult.next(); + } + } else { + assertTrue(hashMapResult.isEof()); } + assertEquals(state, count); + assertEquals(values.length, count); for (int i = 0; i < values.length; ++i) { assertTrue(hs.contains(ByteBuffer.wrap(values[i]))); } Index: ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java (working copy) @@ -0,0 +1,405 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hive.common.util.DateUtils; + +/** + * Generate object inspector and random row object[]. + */ +public class RandomRowObjectSource { + + private Random r; + + private int columnCount; + + private List typeNames; + + private PrimitiveCategory[] primitiveCategories; + + private PrimitiveTypeInfo[] primitiveTypeInfos; + + private List primitiveObjectInspectorList; + + private StructObjectInspector rowStructObjectInspector; + + public List typeNames() { + return typeNames; + } + + public PrimitiveCategory[] primitiveCategories() { + return primitiveCategories; + } + + public PrimitiveTypeInfo[] primitiveTypeInfos() { + return primitiveTypeInfos; + } + + public StructObjectInspector rowStructObjectInspector() { + return rowStructObjectInspector; + } + + public void init(Random r) { + this.r = r; + chooseSchema(); + } + + private static String[] possibleHiveTypeNames = { + "boolean", + "tinyint", + "smallint", + "int", + "bigint", + "date", + "float", + "double", + "string", + "char", + "varchar", + "binary", + "date", + "timestamp", + serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME, + serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME, + "decimal" + }; + + private void chooseSchema() { + columnCount = 1 + r.nextInt(20); + typeNames = new ArrayList(columnCount); + primitiveCategories = new PrimitiveCategory[columnCount]; + primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + primitiveObjectInspectorList = new ArrayList(columnCount); + List columnNames = new ArrayList(columnCount); + for (int c = 0; c < columnCount; c++) { + columnNames.add(String.format("col%d", c)); + int typeNum = r.nextInt(possibleHiveTypeNames.length); + String typeName = possibleHiveTypeNames[typeNum]; + if (typeName.equals("char")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("char(%d)", maxLength); + } else if (typeName.equals("varchar")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("varchar(%d)", maxLength); + } else if (typeName.equals("decimal")) { + typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); + } + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + primitiveTypeInfos[c] = primitiveTypeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + primitiveCategories[c] = primitiveCategory; + primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo)); + typeNames.add(typeName); + } + rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList); + } + + public Object[][] randomRows(int n) { + Object[][] result = new Object[n][]; + for (int i = 0; i < n; i++) { + result[i] = randomRow(); + } + return result; + } + + public Object[] randomRow() { + Object row[] = new Object[columnCount]; + for (int c = 0; c < columnCount; c++) { + Object object = randomObject(c); + if (object == null) { + throw new Error("Unexpected null for column " + c); + } + row[c] = getWritableObject(c, object); + if (row[c] == null) { + throw new Error("Unexpected null for writable for column " + c); + } + } + return row; + } + + public Object getWritableObject(int column, Object object) { + ObjectInspector objectInspector = primitiveObjectInspectorList.get(column); + PrimitiveCategory primitiveCategory = primitiveCategories[column]; + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; + switch (primitiveCategory) { + case BOOLEAN: + return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object); + case BYTE: + return ((WritableByteObjectInspector) objectInspector).create((byte) object); + case SHORT: + return ((WritableShortObjectInspector) objectInspector).create((short) object); + case INT: + return ((WritableIntObjectInspector) objectInspector).create((int) object); + case LONG: + return ((WritableLongObjectInspector) objectInspector).create((long) object); + case DATE: + return ((WritableDateObjectInspector) objectInspector).create((Date) object); + case FLOAT: + return ((WritableFloatObjectInspector) objectInspector).create((float) object); + case DOUBLE: + return ((WritableDoubleObjectInspector) objectInspector).create((double) object); + case STRING: + return ((WritableStringObjectInspector) objectInspector).create((String) object); + case CHAR: + { + WritableHiveCharObjectInspector writableCharObjectInspector = + new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo); + return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); + } + case VARCHAR: + { + WritableHiveVarcharObjectInspector writableVarcharObjectInspector = + new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo); + return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); + } + case BINARY: + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY); + case TIMESTAMP: + return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0)); + case INTERVAL_YEAR_MONTH: + return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0)); + case INTERVAL_DAY_TIME: + return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0)); + case DECIMAL: + { + WritableHiveDecimalObjectInspector writableDecimalObjectInspector = + new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo); + return writableDecimalObjectInspector.create(HiveDecimal.ZERO); + } + default: + throw new Error("Unknown primitive category " + primitiveCategory); + } + } + + public Object randomObject(int column) { + PrimitiveCategory primitiveCategory = primitiveCategories[column]; + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; + switch (primitiveCategory) { + case BOOLEAN: + return Boolean.valueOf(r.nextInt(1) == 1); + case BYTE: + return Byte.valueOf((byte) r.nextInt()); + case SHORT: + return Short.valueOf((short) r.nextInt()); + case INT: + return Integer.valueOf(r.nextInt()); + case LONG: + return Long.valueOf(r.nextLong()); + case DATE: + return getRandDate(r); + case FLOAT: + return Float.valueOf(r.nextFloat() * 10 - 5); + case DOUBLE: + return Double.valueOf(r.nextDouble() * 10 - 5); + case STRING: + return getRandString(r); + case CHAR: + return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo); + case VARCHAR: + return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo); + case BINARY: + return getRandBinary(r, 1 + r.nextInt(100)); + case TIMESTAMP: + return getRandTimestamp(r); + case INTERVAL_YEAR_MONTH: + return getRandIntervalYearMonth(r); + case INTERVAL_DAY_TIME: + return getRandIntervalDayTime(r); + case DECIMAL: + return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo); + default: + throw new Error("Unknown primitive category " + primitiveCategory); + } + } + + public static String getRandString(Random r) { + return getRandString(r, null, r.nextInt(10)); + } + + public static String getRandString(Random r, String characters, int length) { + if (characters == null) { + characters = "ABCDEFGHIJKLMabcdefghijklm"; + } + StringBuilder sb = new StringBuilder(); + sb.append(""); + for (int i = 0; i < length; i++) { + if (characters == null) { + sb.append((char) (r.nextInt(128))); + } else { + sb.append(characters.charAt(r.nextInt(characters.length()))); + } + } + return sb.toString(); + } + + public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) { + int maxLength = 1 + r.nextInt(charTypeInfo.getLength()); + String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveChar hiveChar = new HiveChar(randomString, maxLength); + return hiveChar; + } + + public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) { + int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength()); + String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength); + return hiveVarchar; + } + + public static byte[] getRandBinary(Random r, int len){ + byte[] bytes = new byte[len]; + for (int j = 0; j < len; j++){ + bytes[j] = Byte.valueOf((byte) r.nextInt()); + } + return bytes; + } + + private static final String DECIMAL_CHARS = "0123456789"; + + public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) { + while (true) { + StringBuilder sb = new StringBuilder(); + int precision = 1 + r.nextInt(18); + int scale = 0 + r.nextInt(precision + 1); + + int integerDigits = precision - scale; + + if (r.nextBoolean()) { + sb.append("-"); + } + + if (integerDigits == 0) { + sb.append("0"); + } else { + sb.append(getRandString(r, DECIMAL_CHARS, integerDigits)); + } + if (scale != 0) { + sb.append("."); + sb.append(getRandString(r, DECIMAL_CHARS, scale)); + } + + HiveDecimal bd = HiveDecimal.create(sb.toString()); + if (bd.scale() > bd.precision()) { + // Sometimes weird decimals are produced? + continue; + } + + return bd; + } + } + + public static Date getRandDate(Random r) { + String dateStr = String.format("%d-%02d-%02d", + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28))); // day + Date dateVal = Date.valueOf(dateStr); + return dateVal; + } + + public static Timestamp getRandTimestamp(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); + } + String timestampStr = String.format("%d-%02d-%02d %02d:%02d:%02d%s", + Integer.valueOf(1970 + r.nextInt(200)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + Timestamp timestampVal = Timestamp.valueOf(timestampStr); + return timestampVal; + } + + public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) { + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String intervalYearMonthStr = String.format("%s%d-%d", + yearMonthSignStr, + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(0 + r.nextInt(12))); // month + HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr); + TestCase.assertTrue(intervalYearMonthVal != null); + return intervalYearMonthVal; + } + + public static HiveIntervalDayTime getRandIntervalDayTime(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); + } + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s", + yearMonthSignStr, + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr); + TestCase.assertTrue(intervalDayTimeVal != null); + return intervalDayTimeVal; + } +} Index: ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java (working copy) @@ -88,7 +88,7 @@ ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); List columns = new ArrayList(); columns.add("col1"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); FilterDesc fdesc = new FilterDesc(); fdesc.setPredicate(col1Expr); return new VectorFilterOperator(vc, fdesc); Index: ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java (working copy) @@ -173,7 +173,7 @@ List mapColumnNames = new ArrayList(); mapColumnNames.add("Key"); mapColumnNames.add("Value"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildKeyGroupByDesc (ctx, "max", "Value", TypeInfoFactory.longTypeInfo, @@ -1710,7 +1710,7 @@ mapColumnNames.put("value", i); outputColumnNames.add("value"); - VectorizationContext ctx = new VectorizationContext(outputColumnNames); + VectorizationContext ctx = new VectorizationContext("name", outputColumnNames); ArrayList aggs = new ArrayList(1); aggs.add( @@ -1821,7 +1821,7 @@ List mapColumnNames = new ArrayList(); mapColumnNames.add("Key"); mapColumnNames.add("Value"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Set keys = new HashSet(); AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, @@ -2235,7 +2235,7 @@ Object expected) throws HiveException { List mapColumnNames = new ArrayList(); mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescCountStar (ctx); @@ -2264,7 +2264,7 @@ Object expected) throws HiveException { List mapColumnNames = new ArrayList(); mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, "count", "A", TypeInfoFactory.longTypeInfo); VectorGroupByDesc vectorDesc = desc.getVectorDesc(); @@ -2296,7 +2296,7 @@ Object expected) throws HiveException { List mapColumnNames = new ArrayList(); mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.stringTypeInfo); @@ -2322,11 +2322,12 @@ } public void testAggregateDecimalIterable ( -String aggregateName, Iterable data, - Object expected) throws HiveException { - List mapColumnNames = new ArrayList(); - mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + String aggregateName, + Iterable data, + Object expected) throws HiveException { + List mapColumnNames = new ArrayList(); + mapColumnNames.add("A"); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4)); @@ -2358,7 +2359,7 @@ Object expected) throws HiveException { List mapColumnNames = new ArrayList(); mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType (ctx, aggregateName, "A", TypeInfoFactory.doubleTypeInfo); @@ -2389,7 +2390,7 @@ Object expected) throws HiveException { List mapColumnNames = new ArrayList(); mapColumnNames.add("A"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.longTypeInfo); @@ -2420,7 +2421,7 @@ List mapColumnNames = new ArrayList(); mapColumnNames.add("Key"); mapColumnNames.add("Value"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Set keys = new HashSet(); @@ -2487,7 +2488,7 @@ List mapColumnNames = new ArrayList(); mapColumnNames.add("Key"); mapColumnNames.add("Value"); - VectorizationContext ctx = new VectorizationContext(mapColumnNames); + VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Set keys = new HashSet(); GroupByDesc desc = buildKeyGroupByDesc (ctx, aggregateName, "Value", Index: ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java (working copy) @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; + +import junit.framework.TestCase; + +/** + * Unit test for the vectorized conversion to and from row object[]. + */ +public class TestVectorRowObject extends TestCase { + + void examineBatch(VectorizedRowBatch batch, VectorExtractRowSameBatch vectorExtractRow, + Object[][] randomRows, int firstRandomRowIndex ) { + + int rowSize = vectorExtractRow.getCount(); + Object[] row = new Object[rowSize]; + for (int i = 0; i < batch.size; i++) { + vectorExtractRow.extractRow(i, row); + Object[] expectedRow = randomRows[firstRandomRowIndex + i]; + for (int c = 0; c < rowSize; c++) { + if (!row[c].equals(expectedRow[c])) { + fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch"); + } + } + } + } + + void testVectorRowObject(int caseNum, Random r) throws HiveException { + + Map emptyScratchMap = new HashMap(); + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); + + VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); + batchContext.init(emptyScratchMap, source.rowStructObjectInspector()); + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorAssignRowSameBatch vectorAssignRow = new VectorAssignRowSameBatch(); + vectorAssignRow.init(source.typeNames()); + vectorAssignRow.setOneBatch(batch); + + VectorExtractRowSameBatch vectorExtractRow = new VectorExtractRowSameBatch(); + vectorExtractRow.init(source.typeNames()); + vectorExtractRow.setOneBatch(batch); + + Object[][] randomRows = source.randomRows(100000); + int firstRandomRowIndex = 0; + for (int i = 0; i < randomRows.length; i++) { + Object[] row = randomRows[i]; + + vectorAssignRow.assignRow(batch.size, row); + batch.size++; + if (batch.size == batch.DEFAULT_SIZE) { + examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex); + firstRandomRowIndex = i + 1; + batch.reset(); + } + } + if (batch.size > 0) { + examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex); + } + } + + public void testVectorRowObject() throws Throwable { + + try { + Random r = new Random(5678); + for (int c = 0; c < 10; c++) { + testVectorRowObject(c, r); + } + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +} \ No newline at end of file Index: ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java (working copy) @@ -88,7 +88,7 @@ columns.add("a"); columns.add("b"); columns.add("c"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); SelectDesc selDesc = new SelectDesc(false); List colList = new ArrayList(); Index: ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java (working copy) @@ -0,0 +1,658 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; +import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; + +import junit.framework.TestCase; + +/** + * Unit test for the vectorized serialize and deserialize row. + */ +public class TestVectorSerDeRow extends TestCase { + + public static enum SerializationType { + NONE, + BINARY_SORTABLE, + LAZY_BINARY, + LAZY_SIMPLE + } + + void deserializeAndVerify(Output output, DeserializeRead deserializeRead, + RandomRowObjectSource source, Object[] expectedRow) + throws HiveException, IOException { + deserializeRead.set(output.getData(), 0, output.getLength()); + PrimitiveCategory[] primitiveCategories = source.primitiveCategories(); + for (int i = 0; i < primitiveCategories.length; i++) { + Object expected = expectedRow[i]; + PrimitiveCategory primitiveCategory = primitiveCategories[i]; + PrimitiveTypeInfo primitiveTypeInfo = source.primitiveTypeInfos()[i]; + if (deserializeRead.readCheckNull()) { + throw new HiveException("Unexpected NULL"); + } + switch (primitiveCategory) { + case BOOLEAN: + { + Boolean value = deserializeRead.readBoolean(); + BooleanWritable expectedWritable = (BooleanWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case BYTE: + { + Byte value = deserializeRead.readByte(); + ByteWritable expectedWritable = (ByteWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")"); + } + } + break; + case SHORT: + { + Short value = deserializeRead.readShort(); + ShortWritable expectedWritable = (ShortWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case INT: + { + Integer value = deserializeRead.readInt(); + IntWritable expectedWritable = (IntWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case LONG: + { + Long value = deserializeRead.readLong(); + LongWritable expectedWritable = (LongWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case DATE: + { + DeserializeRead.ReadDateResults readDateResults = deserializeRead.createReadDateResults(); + deserializeRead.readDate(readDateResults); + Date value = readDateResults.getDate(); + DateWritable expectedWritable = (DateWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); + } + } + break; + case FLOAT: + { + Float value = deserializeRead.readFloat(); + FloatWritable expectedWritable = (FloatWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case DOUBLE: + { + Double value = deserializeRead.readDouble(); + DoubleWritable expectedWritable = (DoubleWritable) expected; + if (!value.equals(expectedWritable.get())) { + TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case STRING: + { + DeserializeRead.ReadStringResults readStringResults = deserializeRead.createReadStringResults(); + deserializeRead.readString(readStringResults); + + char[] charsBuffer = new char[readStringResults.bytes.length]; + for (int c = 0; c < charsBuffer.length; c++) { + charsBuffer[c] = (char) (readStringResults.bytes[c] & 0xFF); + } + + byte[] stringBytes = Arrays.copyOfRange(readStringResults.bytes, readStringResults.start, readStringResults.start + readStringResults.length); + + char[] charsRange = new char[stringBytes.length]; + for (int c = 0; c < charsRange.length; c++) { + charsRange[c] = (char) (stringBytes[c] & 0xFF); + } + + Text text = new Text(stringBytes); + String value = text.toString(); + Text expectedWritable = (Text) expected; + if (!value.equals(expectedWritable.toString())) { + TestCase.fail("String field mismatch (expected '" + expectedWritable.toString() + "' found '" + value + "')"); + } + } + break; + case CHAR: + { + DeserializeRead.ReadHiveCharResults readHiveCharResults = deserializeRead.createReadHiveCharResults(); + deserializeRead.readHiveChar(readHiveCharResults); + HiveChar hiveChar = readHiveCharResults.getHiveChar(); + HiveCharWritable expectedWritable = (HiveCharWritable) expected; + if (!hiveChar.equals(expectedWritable.getHiveChar())) { + TestCase.fail("Char field mismatch (expected '" + expectedWritable.getHiveChar() + "' found '" + hiveChar + "')"); + } + } + break; + case VARCHAR: + { + DeserializeRead.ReadHiveVarcharResults readHiveVarcharResults = deserializeRead.createReadHiveVarcharResults(); + deserializeRead.readHiveVarchar(readHiveVarcharResults); + HiveVarchar hiveVarchar = readHiveVarcharResults.getHiveVarchar(); + HiveVarcharWritable expectedWritable = (HiveVarcharWritable) expected; + if (!hiveVarchar.equals(expectedWritable.getHiveVarchar())) { + TestCase.fail("Varchar field mismatch (expected '" + expectedWritable.getHiveVarchar() + "' found '" + hiveVarchar + "')"); + } + } + break; + case DECIMAL: + { + DeserializeRead.ReadDecimalResults readDecimalResults = deserializeRead.createReadDecimalResults(); + deserializeRead.readHiveDecimal(readDecimalResults); + HiveDecimal value = readDecimalResults.getHiveDecimal(); + if (value == null) { + TestCase.fail("Decimal field evaluated to NULL"); + } + HiveDecimalWritable expectedWritable = (HiveDecimalWritable) expected; + if (!value.equals(expectedWritable.getHiveDecimal())) { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; + int precision = decimalTypeInfo.getPrecision(); + int scale = decimalTypeInfo.getScale(); + TestCase.fail("Decimal field mismatch (expected " + expectedWritable.getHiveDecimal() + " found " + value.toString() + ") precision " + precision + ", scale " + scale); + } + } + break; + case TIMESTAMP: + { + DeserializeRead.ReadTimestampResults readTimestampResults = deserializeRead.createReadTimestampResults(); + deserializeRead.readTimestamp(readTimestampResults); + Timestamp value = readTimestampResults.getTimestamp(); + TimestampWritable expectedWritable = (TimestampWritable) expected; + if (!value.equals(expectedWritable.getTimestamp())) { + TestCase.fail("Timestamp field mismatch (expected " + expectedWritable.getTimestamp() + " found " + value.toString() + ")"); + } + } + break; + case INTERVAL_YEAR_MONTH: + { + DeserializeRead.ReadIntervalYearMonthResults readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); + deserializeRead.readIntervalYearMonth(readIntervalYearMonthResults); + HiveIntervalYearMonth value = readIntervalYearMonthResults.getHiveIntervalYearMonth(); + HiveIntervalYearMonthWritable expectedWritable = (HiveIntervalYearMonthWritable) expected; + HiveIntervalYearMonth expectedValue = expectedWritable.getHiveIntervalYearMonth(); + if (!value.equals(expectedValue)) { + TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expectedValue + " found " + value.toString() + ")"); + } + } + break; + case INTERVAL_DAY_TIME: + { + DeserializeRead.ReadIntervalDayTimeResults readIntervalDayTimeResults = deserializeRead.createReadIntervalDayTimeResults(); + deserializeRead.readIntervalDayTime(readIntervalDayTimeResults); + HiveIntervalDayTime value = readIntervalDayTimeResults.getHiveIntervalDayTime(); + HiveIntervalDayTimeWritable expectedWritable = (HiveIntervalDayTimeWritable) expected; + HiveIntervalDayTime expectedValue = expectedWritable.getHiveIntervalDayTime(); + if (!value.equals(expectedValue)) { + TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expectedValue + " found " + value.toString() + ")"); + } + } + break; + case BINARY: + { + DeserializeRead.ReadBinaryResults readBinaryResults = deserializeRead.createReadBinaryResults(); + deserializeRead.readBinary(readBinaryResults); + byte[] byteArray = Arrays.copyOfRange(readBinaryResults.bytes, readBinaryResults.start, readBinaryResults.start + readBinaryResults.length); + BytesWritable expectedWritable = (BytesWritable) expected; + if (byteArray.length != expectedWritable.getLength()){ + TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + byteArray + ")"); + } + byte[] expectedBytes = expectedWritable.getBytes(); + for (int b = 0; b < byteArray.length; b++) { + if (byteArray[b] != expectedBytes[b]) { + TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + byteArray + ")"); + } + } + } + break; + default: + throw new HiveException("Unexpected primitive category " + primitiveCategory); + } + } + deserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!deserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!deserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!deserializeRead.bufferRangeHasExtraDataWarned()); + } + + void serializeBatch(VectorizedRowBatch batch, VectorSerializeRow vectorSerializeRow, + DeserializeRead deserializeRead, RandomRowObjectSource source, Object[][] randomRows, + int firstRandomRowIndex) throws HiveException, IOException { + + Output output = new Output(); + for (int i = 0; i < batch.size; i++) { + output.reset(); + vectorSerializeRow.setOutput(output); + vectorSerializeRow.serializeWrite(batch, i); + Object[] expectedRow = randomRows[firstRandomRowIndex + i]; + + byte[] bytes = output.getData(); + int length = output.getLength(); + char[] chars = new char[length]; + for (int c = 0; c < chars.length; c++) { + chars[c] = (char) (bytes[c] & 0xFF); + } + + deserializeAndVerify(output, deserializeRead, source, expectedRow); + } + } + + void testVectorSerializeRow(int caseNum, Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException { + + Map emptyScratchMap = new HashMap(); + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); + + VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); + batchContext.init(emptyScratchMap, source.rowStructObjectInspector()); + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorAssignRowSameBatch vectorAssignRow = new VectorAssignRowSameBatch(); + vectorAssignRow.init(source.typeNames()); + vectorAssignRow.setOneBatch(batch); + + int fieldCount = source.typeNames().size(); + DeserializeRead deserializeRead; + SerializeWrite serializeWrite; + switch (serializationType) { + case BINARY_SORTABLE: + deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos()); + serializeWrite = new BinarySortableSerializeWrite(fieldCount); + break; + case LAZY_BINARY: + deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos()); + serializeWrite = new LazyBinarySerializeWrite(fieldCount); + break; + case LAZY_SIMPLE: + { + StructObjectInspector rowObjectInspector = source.rowStructObjectInspector(); + LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector); + byte separator = (byte) '\t'; + deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), + separator, lazySerDeParams); + serializeWrite = new LazySimpleSerializeWrite(fieldCount, + separator, lazySerDeParams); + } + break; + default: + throw new Error("Unknown serialization type " + serializationType); + } + VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite); + vectorSerializeRow.init(source.typeNames()); + + Object[][] randomRows = source.randomRows(100000); + int firstRandomRowIndex = 0; + for (int i = 0; i < randomRows.length; i++) { + Object[] row = randomRows[i]; + + vectorAssignRow.assignRow(batch.size, row); + batch.size++; + if (batch.size == batch.DEFAULT_SIZE) { + serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex); + firstRandomRowIndex = i + 1; + batch.reset(); + } + } + if (batch.size > 0) { + serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex); + } + } + + void examineBatch(VectorizedRowBatch batch, VectorExtractRowSameBatch vectorExtractRow, + Object[][] randomRows, int firstRandomRowIndex ) { + + int rowSize = vectorExtractRow.getCount(); + Object[] row = new Object[rowSize]; + for (int i = 0; i < batch.size; i++) { + vectorExtractRow.extractRow(i, row); + + Object[] expectedRow = randomRows[firstRandomRowIndex + i]; + + for (int c = 0; c < rowSize; c++) { + if (row[c] == null) { + fail("Unexpected NULL from extractRow"); + } + if (!row[c].equals(expectedRow[c])) { + fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch"); + } + } + } + } + + private Output serializeRow(Object[] row, RandomRowObjectSource source, SerializeWrite serializeWrite) throws HiveException, IOException { + Output output = new Output(); + serializeWrite.set(output); + PrimitiveCategory[] primitiveCategories = source.primitiveCategories(); + for (int i = 0; i < primitiveCategories.length; i++) { + Object object = row[i]; + PrimitiveCategory primitiveCategory = primitiveCategories[i]; + switch (primitiveCategory) { + case BOOLEAN: + { + BooleanWritable expectedWritable = (BooleanWritable) object; + boolean value = expectedWritable.get(); + serializeWrite.writeBoolean(value); + } + break; + case BYTE: + { + ByteWritable expectedWritable = (ByteWritable) object; + byte value = expectedWritable.get(); + serializeWrite.writeByte(value); + } + break; + case SHORT: + { + ShortWritable expectedWritable = (ShortWritable) object; + short value = expectedWritable.get(); + serializeWrite.writeShort(value); + } + break; + case INT: + { + IntWritable expectedWritable = (IntWritable) object; + int value = expectedWritable.get(); + serializeWrite.writeInt(value); + } + break; + case LONG: + { + LongWritable expectedWritable = (LongWritable) object; + long value = expectedWritable.get(); + serializeWrite.writeLong(value); + } + break; + case DATE: + { + DateWritable expectedWritable = (DateWritable) object; + Date value = expectedWritable.get(); + serializeWrite.writeDate(value); + } + break; + case FLOAT: + { + FloatWritable expectedWritable = (FloatWritable) object; + float value = expectedWritable.get(); + serializeWrite.writeFloat(value); + } + break; + case DOUBLE: + { + DoubleWritable expectedWritable = (DoubleWritable) object; + double value = expectedWritable.get(); + serializeWrite.writeDouble(value); + } + break; + case STRING: + { + Text text = (Text) object; + serializeWrite.writeString(text.getBytes(), 0, text.getLength()); + } + break; + case CHAR: + { + HiveCharWritable expectedWritable = (HiveCharWritable) object; + HiveChar value = expectedWritable.getHiveChar(); + serializeWrite.writeHiveChar(value); + } + break; + case VARCHAR: + { + HiveVarcharWritable expectedWritable = (HiveVarcharWritable) object; + HiveVarchar value = expectedWritable.getHiveVarchar(); + serializeWrite.writeHiveVarchar(value); + } + break; + case BINARY: + { + BytesWritable expectedWritable = (BytesWritable) object; + byte[] bytes = expectedWritable.getBytes(); + int length = expectedWritable.getLength(); + serializeWrite.writeBinary(bytes, 0, length); + } + break; + case TIMESTAMP: + { + TimestampWritable expectedWritable = (TimestampWritable) object; + Timestamp value = expectedWritable.getTimestamp(); + serializeWrite.writeTimestamp(value); + } + break; + case INTERVAL_YEAR_MONTH: + { + HiveIntervalYearMonthWritable expectedWritable = (HiveIntervalYearMonthWritable) object; + HiveIntervalYearMonth value = expectedWritable.getHiveIntervalYearMonth(); + serializeWrite.writeHiveIntervalYearMonth(value); + } + break; + case INTERVAL_DAY_TIME: + { + HiveIntervalDayTimeWritable expectedWritable = (HiveIntervalDayTimeWritable) object; + HiveIntervalDayTime value = expectedWritable.getHiveIntervalDayTime(); + serializeWrite.writeHiveIntervalDayTime(value); + } + break; + case DECIMAL: + { + HiveDecimalWritable expectedWritable = (HiveDecimalWritable) object; + HiveDecimal value = expectedWritable.getHiveDecimal(); + serializeWrite.writeHiveDecimal(value); + } + break; + default: + throw new HiveException("Unexpected primitive category " + primitiveCategory); + } + } + return output; + } + + private Properties createProperties(String fieldNames, String fieldTypes) { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); + + tbl.setProperty("columns", fieldNames); + tbl.setProperty("columns.types", fieldTypes); + + tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); + + return tbl; + } + + private LazySerDeParameters getSerDeParams(StructObjectInspector rowObjectInspector) throws SerDeException { + String fieldNames = ObjectInspectorUtils.getFieldNames(rowObjectInspector); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowObjectInspector); + Configuration conf = new Configuration(); + Properties tbl = createProperties(fieldNames, fieldTypes); + return new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); + } + + void testVectorDeserializeRow(int caseNum, Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException { + + Map emptyScratchMap = new HashMap(); + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); + + VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); + batchContext.init(emptyScratchMap, source.rowStructObjectInspector()); + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + int fieldCount = source.typeNames().size(); + DeserializeRead deserializeRead; + SerializeWrite serializeWrite; + switch (serializationType) { + case BINARY_SORTABLE: + deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos()); + serializeWrite = new BinarySortableSerializeWrite(fieldCount); + break; + case LAZY_BINARY: + deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos()); + serializeWrite = new LazyBinarySerializeWrite(fieldCount); + break; + case LAZY_SIMPLE: + { + StructObjectInspector rowObjectInspector = source.rowStructObjectInspector(); + LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector); + byte separator = (byte) '\t'; + deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), + separator, lazySerDeParams); + serializeWrite = new LazySimpleSerializeWrite(fieldCount, + separator, lazySerDeParams); + } + break; + default: + throw new Error("Unknown serialization type " + serializationType); + } + VectorDeserializeRow vectorDeserializeRow = new VectorDeserializeRow(deserializeRead); + vectorDeserializeRow.init(); + + VectorExtractRowSameBatch vectorExtractRow = new VectorExtractRowSameBatch(); + vectorExtractRow.init(source.typeNames()); + vectorExtractRow.setOneBatch(batch); + + Object[][] randomRows = source.randomRows(100000); + int firstRandomRowIndex = 0; + for (int i = 0; i < randomRows.length; i++) { + Object[] row = randomRows[i]; + + Output output = serializeRow(row, source, serializeWrite); + vectorDeserializeRow.setBytes(output.getData(), 0, output.getLength()); + vectorDeserializeRow.deserializeByValue(batch, batch.size); + batch.size++; + if (batch.size == batch.DEFAULT_SIZE) { + examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex); + firstRandomRowIndex = i + 1; + batch.reset(); + } + } + if (batch.size > 0) { + examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex); + } + } + + public void testVectorSerDeRow() throws Throwable { + + try { + Random r = new Random(5678); + for (int c = 0; c < 10; c++) { + testVectorSerializeRow(c, r, SerializationType.BINARY_SORTABLE); + } + for (int c = 0; c < 10; c++) { + testVectorSerializeRow(c, r, SerializationType.LAZY_BINARY); + } + for (int c = 0; c < 10; c++) { + testVectorSerializeRow(c, r, SerializationType.LAZY_SIMPLE); + } + + for (int c = 0; c < 10; c++) { + testVectorDeserializeRow(c, r, SerializationType.BINARY_SORTABLE); + } + for (int c = 0; c < 10; c++) { + testVectorDeserializeRow(c, r, SerializationType.LAZY_BINARY); + } + for (int c = 0; c < 10; c++) { + testVectorDeserializeRow(c, r, SerializationType.LAZY_SIMPLE); + } + + + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +} \ No newline at end of file Index: ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java (working copy) @@ -247,7 +247,7 @@ children5.add(col6Expr); modExpr.setChildren(children5); - VectorizationContext vc = new VectorizationContext(); + VectorizationContext vc = new VectorizationContext("name"); vc.addInitialColumn("col1"); vc.addInitialColumn("col2"); vc.addInitialColumn("col3"); @@ -297,7 +297,7 @@ columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -322,7 +322,7 @@ columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -341,7 +341,7 @@ children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -360,7 +360,7 @@ children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -378,7 +378,7 @@ children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -395,7 +395,7 @@ children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -412,7 +412,7 @@ children1.add(col2Expr); exprDesc.setChildren(children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -434,7 +434,7 @@ List columns = new ArrayList(); columns.add("col1"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -480,7 +480,7 @@ columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -530,7 +530,7 @@ List columns = new ArrayList(); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER); assertEquals(veAnd.getClass(), FilterExprAndExpr.class); assertEquals(veAnd.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class); @@ -555,7 +555,7 @@ orExprDesc.setChildren(children4); //Allocate new Vectorization context to reset the intermediate columns. - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER); assertEquals(veOr.getClass(), FilterExprOrExpr.class); assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class); @@ -596,7 +596,7 @@ columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(notExpr, VectorExpressionDescriptor.Mode.FILTER); @@ -633,7 +633,7 @@ List columns = new ArrayList(); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(isNullExpr, VectorExpressionDescriptor.Mode.FILTER); @@ -674,7 +674,7 @@ List columns = new ArrayList(); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(isNotNullExpr, VectorExpressionDescriptor.Mode.FILTER); @@ -703,7 +703,7 @@ List columns = new ArrayList(); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(scalarMinusConstant, VectorExpressionDescriptor.Mode.PROJECTION); assertEquals(ve.getClass(), LongScalarSubtractLongColumn.class); @@ -726,7 +726,7 @@ columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); @@ -744,7 +744,7 @@ List columns = new ArrayList(); columns.add("col0"); columns.add("col1"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(negExprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -762,7 +762,7 @@ List columns = new ArrayList(); columns.add("col0"); columns.add("col1"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(negExprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -787,7 +787,7 @@ List columns = new ArrayList(); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(scalarGreaterColExpr, VectorExpressionDescriptor.Mode.FILTER); assertEquals(FilterLongScalarGreaterLongColumn.class, ve.getClass()); } @@ -810,7 +810,7 @@ List columns = new ArrayList(); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(colEqualScalar, VectorExpressionDescriptor.Mode.FILTER); assertEquals(FilterLongColEqualLongScalar.class, ve.getClass()); } @@ -833,7 +833,7 @@ List columns = new ArrayList(); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(colEqualScalar, VectorExpressionDescriptor.Mode.PROJECTION); assertEquals(LongColEqualLongScalar.class, ve.getClass()); } @@ -850,7 +850,7 @@ List columns = new ArrayList(); columns.add("b"); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); GenericUDF stringLower = new GenericUDFLower(); stringUnary.setGenericUDF(stringLower); @@ -860,7 +860,7 @@ assertEquals(1, ((StringLower) ve).getColNum()); assertEquals(2, ((StringLower) ve).getOutputColumn()); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ExprNodeGenericFuncDesc anotherUnary = new ExprNodeGenericFuncDesc(); anotherUnary.setTypeInfo(TypeInfoFactory.stringTypeInfo); @@ -895,7 +895,7 @@ List columns = new ArrayList(); columns.add("b"); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); // Sin(double) GenericUDFBridge gudfBridge = new GenericUDFBridge("sin", false, UDFSin.class.getName()); @@ -986,7 +986,7 @@ List columns = new ArrayList(); columns.add("b"); columns.add("a"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); //UDFYear GenericUDFBridge gudfBridge = new GenericUDFBridge("year", false, UDFYear.class.getName()); @@ -1024,7 +1024,7 @@ columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterStringColumnBetween); @@ -1050,7 +1050,7 @@ exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterCharColumnBetween); @@ -1075,7 +1075,7 @@ exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1); - vc = new VectorizationContext(columns); + vc = new VectorizationContext("name", columns); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterVarCharColumnBetween); @@ -1144,7 +1144,7 @@ columns.add("col0"); columns.add("col1"); columns.add("col2"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterStringColumnInList); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); @@ -1199,7 +1199,7 @@ columns.add("col1"); columns.add("col2"); columns.add("col3"); - VectorizationContext vc = new VectorizationContext(columns); + VectorizationContext vc = new VectorizationContext("name", columns); VectorExpression ve = vc.getVectorExpression(exprDesc); assertTrue(ve instanceof IfExprLongColumnLongColumn); Index: ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (working copy) @@ -66,6 +66,7 @@ import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.InputFormatChecker; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; @@ -98,7 +99,6 @@ import org.apache.hadoop.mapred.RecordWriter; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.StringUtils; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -394,22 +394,9 @@ OrcInputFormat.getInputPaths(conf)); } - static class TestContext extends OrcInputFormat.Context { - List queue = new ArrayList(); - - TestContext(Configuration conf) { - super(conf); - } - - @Override - public void schedule(Runnable runnable) { - queue.add(runnable); - } - } - @Test public void testFileGenerator() throws Exception { - TestContext context = new TestContext(conf); + OrcInputFormat.Context context = new OrcInputFormat.Context(conf); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[0]), new MockFile("mock:/a/b/part-01", 1000, new byte[0]), @@ -419,21 +406,22 @@ OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b")); - gen.run(); - if (context.getErrors().size() > 0) { - for(Throwable th: context.getErrors()) { - System.out.println(StringUtils.stringifyException(th)); - } - throw new IOException("Errors during file generation"); - } - assertEquals(-1, context.getSchedulers()); - assertEquals(3, context.queue.size()); - assertEquals(new Path("mock:/a/b/part-00"), - ((OrcInputFormat.SplitGenerator) context.queue.get(0)).getPath()); - assertEquals(new Path("mock:/a/b/part-01"), - ((OrcInputFormat.SplitGenerator) context.queue.get(1)).getPath()); - assertEquals(new Path("mock:/a/b/part-04"), - ((OrcInputFormat.SplitGenerator) context.queue.get(2)).getPath()); + SplitStrategy splitStrategy = gen.call(); + assertEquals(true, splitStrategy instanceof OrcInputFormat.BISplitStrategy); + + conf.set("mapreduce.input.fileinputformat.split.maxsize", "500"); + context = new OrcInputFormat.Context(conf); + fs = new MockFileSystem(conf, + new MockFile("mock:/a/b/part-00", 1000, new byte[1000]), + new MockFile("mock:/a/b/part-01", 1000, new byte[1000]), + new MockFile("mock:/a/b/_part-02", 1000, new byte[1000]), + new MockFile("mock:/a/b/.part-03", 1000, new byte[1000]), + new MockFile("mock:/a/b/part-04", 1000, new byte[1000])); + gen = new OrcInputFormat.FileGenerator(context, fs, + new MockPath(fs, "mock:/a/b")); + splitStrategy = gen.call(); + assertEquals(true, splitStrategy instanceof OrcInputFormat.ETLSplitStrategy); + } public static class MockBlock { @@ -848,11 +836,10 @@ new MockBlock("host5-1", "host5-2", "host5-3"))); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); OrcInputFormat.SplitGenerator splitter = - new OrcInputFormat.SplitGenerator(context, fs, + new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, fs.getFileStatus(new Path("/a/file")), null, true, - new ArrayList(), true); - splitter.createSplit(0, 200, null); - OrcSplit result = context.getResult(-1); + new ArrayList(), true, null, null)); + OrcSplit result = splitter.createSplit(0, 200, null); assertEquals(0, result.getStart()); assertEquals(200, result.getLength()); assertEquals("mock:/a/file", result.getPath().toString()); @@ -861,15 +848,13 @@ assertEquals("host1-1", locs[0]); assertEquals("host1-2", locs[1]); assertEquals("host1-3", locs[2]); - splitter.createSplit(500, 600, null); - result = context.getResult(-1); + result = splitter.createSplit(500, 600, null); locs = result.getLocations(); assertEquals(3, locs.length); assertEquals("host2-1", locs[0]); assertEquals("host0", locs[1]); assertEquals("host2-3", locs[2]); - splitter.createSplit(0, 2500, null); - result = context.getResult(-1); + result = splitter.createSplit(0, 2500, null); locs = result.getLocations(); assertEquals(1, locs.length); assertEquals("host0", locs[0]); @@ -892,48 +877,36 @@ conf.setInt(OrcInputFormat.MIN_SPLIT_SIZE, 200); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); OrcInputFormat.SplitGenerator splitter = - new OrcInputFormat.SplitGenerator(context, fs, + new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, fs.getFileStatus(new Path("/a/file")), null, true, - new ArrayList(), true); - splitter.run(); - if (context.getErrors().size() > 0) { - for(Throwable th: context.getErrors()) { - System.out.println(StringUtils.stringifyException(th)); - } - throw new IOException("Errors during splitting"); - } - OrcSplit result = context.getResult(0); + new ArrayList(), true, null, null)); + List results = splitter.call(); + OrcSplit result = results.get(0); assertEquals(3, result.getStart()); assertEquals(497, result.getLength()); - result = context.getResult(1); + result = results.get(1); assertEquals(500, result.getStart()); assertEquals(600, result.getLength()); - result = context.getResult(2); + result = results.get(2); assertEquals(1100, result.getStart()); assertEquals(400, result.getLength()); - result = context.getResult(3); + result = results.get(3); assertEquals(1500, result.getStart()); assertEquals(300, result.getLength()); - result = context.getResult(4); + result = results.get(4); assertEquals(1800, result.getStart()); assertEquals(200, result.getLength()); // test min = 0, max = 0 generates each stripe conf.setInt(OrcInputFormat.MIN_SPLIT_SIZE, 0); conf.setInt(OrcInputFormat.MAX_SPLIT_SIZE, 0); context = new OrcInputFormat.Context(conf); - splitter = new OrcInputFormat.SplitGenerator(context, fs, + splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, fs.getFileStatus(new Path("/a/file")), null, true, new ArrayList(), - true); - splitter.run(); - if (context.getErrors().size() > 0) { - for(Throwable th: context.getErrors()) { - System.out.println(StringUtils.stringifyException(th)); - } - throw new IOException("Errors during splitting"); - } + true, null, null)); + results = splitter.call(); for(int i=0; i < stripeSizes.length; ++i) { assertEquals("checking stripe " + i + " size", - stripeSizes[i], context.getResult(i).getLength()); + stripeSizes[i], results.get(i).getLength()); } } @@ -1315,9 +1288,6 @@ } mapWork.setPathToAliases(aliasMap); mapWork.setPathToPartitionInfo(partMap); - mapWork.setAllColumnVectorMaps(new HashMap>()); - mapWork.setAllScratchColumnVectorTypeMaps(new HashMap>()); // write the plan out FileSystem localFs = FileSystem.getLocal(conf).getRaw(); Index: ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (working copy) @@ -187,10 +187,11 @@ } private static ByteBuffer byteBuf(int... items) { - ByteBuffer result = ByteBuffer.allocate(items.length); + ByteBuffer result = ByteBuffer.allocate(items.length); for(int item: items) { result.put((byte) item); } + result.flip(); return result; } @@ -703,12 +704,12 @@ assertEquals(0, items.get(0).getPositions(0)); assertEquals(0, items.get(0).getPositions(1)); assertEquals(0, items.get(0).getPositions(2)); - assertEquals(1, + assertEquals(1, items.get(0).getStatistics().getIntStatistics().getMinimum()); index = recordReader.readRowIndex(1, null, null).getRowGroupIndex(); assertEquals(3, index.length); items = index[1].getEntryList(); - assertEquals(2, + assertEquals(2, items.get(0).getStatistics().getIntStatistics().getMaximum()); } Index: ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRowGroupFilter.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRowGroupFilter.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRowGroupFilter.java (working copy) @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.parquet; + +import com.google.common.collect.Lists; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; +import org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector; +import org.apache.hadoop.hive.ql.plan.*; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import parquet.io.api.RecordConsumer; +import parquet.schema.MessageType; +import parquet.schema.MessageTypeParser; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class TestParquetRowGroupFilter extends AbstractTestParquetDirect { + + JobConf conf; + String columnNames; + String columnTypes; + + @Before + public void initConf() throws Exception { + conf = new JobConf(); + + } + + @Test + public void testRowGroupFilterTakeEffect() throws Exception { + // define schema + columnNames = "intCol"; + columnTypes = "int"; + StructObjectInspector inspector = getObjectInspector(columnNames, columnTypes); + MessageType fileSchema = MessageTypeParser.parseMessageType( + "message hive_schema {\n" + + " optional int32 intCol;\n" + + "}\n" + ); + + conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "intCol"); + conf.set("columns", "intCol"); + conf.set("columns.types", "int"); + + // create Parquet file with specific data + Path testPath = writeDirect("RowGroupFilterTakeEffect", fileSchema, + new DirectWriter() { + @Override + public void write(RecordConsumer consumer) { + for(int i = 0; i < 100; i++) { + consumer.startMessage(); + consumer.startField("int", 0); + consumer.addInteger(i); + consumer.endField("int", 0); + consumer.endMessage(); + } + } + }); + + // > 50 + GenericUDF udf = new GenericUDFOPGreaterThan(); + List children = Lists.newArrayList(); + ExprNodeColumnDesc columnDesc = new ExprNodeColumnDesc(Integer.class, "intCol", "T", false); + ExprNodeConstantDesc constantDesc = new ExprNodeConstantDesc(50); + children.add(columnDesc); + children.add(constantDesc); + ExprNodeGenericFuncDesc genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children); + String searchArgumentStr = Utilities.serializeExpression(genericFuncDesc); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr); + + ParquetRecordReaderWrapper recordReader = (ParquetRecordReaderWrapper) + new MapredParquetInputFormat().getRecordReader( + new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null); + + Assert.assertEquals("row group is not filtered correctly", 1, recordReader.getFiltedBlocks().size()); + + // > 100 + constantDesc = new ExprNodeConstantDesc(100); + children.set(1, constantDesc); + genericFuncDesc = new ExprNodeGenericFuncDesc(inspector, udf, children); + searchArgumentStr = Utilities.serializeExpression(genericFuncDesc); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, searchArgumentStr); + + recordReader = (ParquetRecordReaderWrapper) + new MapredParquetInputFormat().getRecordReader( + new FileSplit(testPath, 0, fileLength(testPath), (String[]) null), conf, null); + + Assert.assertEquals("row group is not filtered correctly", 0, recordReader.getFiltedBlocks().size()); + } + + private ArrayWritableObjectInspector getObjectInspector(final String columnNames, final String columnTypes) { + List columnTypeList = createHiveTypeInfoFrom(columnTypes); + List columnNameList = createHiveColumnsFrom(columnNames); + StructTypeInfo rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNameList, columnTypeList); + + return new ArrayWritableObjectInspector(rowTypeInfo); + } + + private List createHiveColumnsFrom(final String columnNamesStr) { + List columnNames; + if (columnNamesStr.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNamesStr.split(",")); + } + + return columnNames; + } + + private List createHiveTypeInfoFrom(final String columnsTypeStr) { + List columnTypes; + + if (columnsTypeStr.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnsTypeStr); + } + + return columnTypes; + } +} Index: ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java (working copy) @@ -20,6 +20,7 @@ import static org.apache.hadoop.hive.metastore.MetaStoreUtils.DEFAULT_DATABASE_NAME; +import java.io.StringWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -28,7 +29,6 @@ import java.util.Map; import java.util.regex.Pattern; -import com.google.common.collect.ImmutableMap; import junit.framework.TestCase; import org.apache.hadoop.fs.FileStatus; @@ -56,8 +56,15 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.StringUtils; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; +import org.apache.log4j.WriterAppender; import org.apache.thrift.protocol.TBinaryProtocol; +import org.junit.Assert; +import com.google.common.collect.ImmutableMap; + /** * TestHive. * @@ -234,7 +241,47 @@ } } + /** + * Test logging of timing for metastore api calls + * + * @throws Throwable + */ + public void testMetaStoreApiTiming() throws Throwable { + // set log level to DEBUG, as this is logged at debug level + Logger logger = Logger.getLogger("hive.ql.metadata.Hive"); + Level origLevel = logger.getLevel(); + logger.setLevel(Level.DEBUG); + + // create an appender to capture the logs in a string + StringWriter writer = new StringWriter(); + WriterAppender appender = new WriterAppender(new PatternLayout(), writer); + + try { + logger.addAppender(appender); + + hm.clearMetaCallTiming(); + hm.getAllDatabases(); + hm.dumpAndClearMetaCallTiming("test"); + String logStr = writer.toString(); + String expectedString = "getAllDatabases_()="; + Assert.assertTrue(logStr + " should contain <" + expectedString, + logStr.contains(expectedString)); + + // reset the log buffer, verify new dump without any api call does not contain func + writer.getBuffer().setLength(0); + hm.dumpAndClearMetaCallTiming("test"); + logStr = writer.toString(); + Assert.assertFalse(logStr + " should not contain <" + expectedString, + logStr.contains(expectedString)); + + } finally { + logger.setLevel(origLevel); + logger.removeAppender(appender); + } + } + + /** * Gets a table from the metastore and compares it to the original Table * * @param tbl @@ -704,7 +751,7 @@ index.getIndexName()); assertEquals("Table names don't match for index: " + indexName, tableName, index.getOrigTableName()); - assertEquals("Index table names didn't match for index: " + indexName, qIndexTableName, + assertEquals("Index table names didn't match for index: " + indexName, indexTableName, index.getIndexTableName()); assertEquals("Index handler classes didn't match for index: " + indexName, indexHandlerClass, index.getIndexHandlerClass()); Index: ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java (working copy) @@ -52,7 +52,7 @@ columns.add("col3"); //Generate vectorized expression - vContext = new VectorizationContext(columns); + vContext = new VectorizationContext("name", columns); } @Description(name = "fake", value = "FAKE") Index: ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java (working copy) @@ -17,15 +17,12 @@ */ package org.apache.hadoop.hive.ql.parse; -import static org.junit.Assert.*; - import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.Map; -import junit.framework.Assert; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; @@ -44,7 +41,6 @@ import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.hive.ql.session.SessionState; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; public class TestUpdateDeleteSemanticAnalyzer { @@ -135,7 +131,7 @@ @Test public void testUpdateAllNonPartitioned() throws Exception { try { - ReturnInfo rc = parseAndAnalyze("update T set a = 5", "testUpdateAllNonPartitioned"); + ReturnInfo rc = parseAndAnalyze("update T set b = 5", "testUpdateAllNonPartitioned"); LOG.info(explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); } finally { cleanupTables(); @@ -145,7 +141,7 @@ @Test public void testUpdateAllNonPartitionedWhere() throws Exception { try { - ReturnInfo rc = parseAndAnalyze("update T set a = 5 where b > 5", + ReturnInfo rc = parseAndAnalyze("update T set b = 5 where b > 5", "testUpdateAllNonPartitionedWhere"); LOG.info(explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); } finally { @@ -156,7 +152,7 @@ @Test public void testUpdateAllPartitioned() throws Exception { try { - ReturnInfo rc = parseAndAnalyze("update U set a = 5", "testUpdateAllPartitioned"); + ReturnInfo rc = parseAndAnalyze("update U set b = 5", "testUpdateAllPartitioned"); LOG.info(explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); } finally { cleanupTables(); @@ -166,7 +162,7 @@ @Test public void testUpdateAllPartitionedWhere() throws Exception { try { - ReturnInfo rc = parseAndAnalyze("update U set a = 5 where b > 5", + ReturnInfo rc = parseAndAnalyze("update U set b = 5 where b > 5", "testUpdateAllPartitionedWhere"); LOG.info(explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); } finally { @@ -177,7 +173,7 @@ @Test public void testUpdateOnePartition() throws Exception { try { - ReturnInfo rc = parseAndAnalyze("update U set a = 5 where ds = 'today'", + ReturnInfo rc = parseAndAnalyze("update U set b = 5 where ds = 'today'", "testUpdateOnePartition"); LOG.info(explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); } finally { @@ -188,7 +184,7 @@ @Test public void testUpdateOnePartitionWhere() throws Exception { try { - ReturnInfo rc = parseAndAnalyze("update U set a = 5 where ds = 'today' and b > 5", + ReturnInfo rc = parseAndAnalyze("update U set b = 5 where ds = 'today' and b > 5", "testUpdateOnePartitionWhere"); LOG.info(explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); } finally { @@ -266,7 +262,7 @@ db = sem.getDb(); // I have to create the tables here (rather than in setup()) because I need the Hive - // connection, which is conviently created by the semantic analyzer. + // connection, which is conveniently created by the semantic analyzer. Map params = new HashMap(1); params.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); db.createTable("T", Arrays.asList("a", "b"), null, OrcInputFormat.class, @@ -297,7 +293,7 @@ fs.create(tmp); fs.deleteOnExit(tmp); ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(), - sem.getFetchTask(), astStringTree, sem, true, false, false, false, false); + sem.getFetchTask(), astStringTree, sem, true, false, false, false, false, false, null); ExplainTask task = new ExplainTask(); task.setWork(work); task.initialize(conf, plan, null); Index: ql/src/test/org/apache/hadoop/hive/ql/session/TestAddResource.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/session/TestAddResource.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/session/TestAddResource.java (working copy) @@ -0,0 +1,318 @@ +package org.apache.hadoop.hive.ql.session; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.IOException; +import java.io.Writer; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; +import org.apache.hadoop.hive.ql.session.SessionState; + +import java.io.BufferedWriter; +import java.io.FileWriter; + + +public class TestAddResource { + + private static final String TEST_JAR_DIR = System.getProperty("test.tmp.dir", ".") + "/"; + private HiveConf conf; + private ResourceType t; + + @Before + public void setup() throws IOException { + conf = new HiveConf(); + t = ResourceType.JAR; + + //Generate test jar files + for (int i = 1; i <= 5; i++) { + Writer output = null; + String dataFile = TEST_JAR_DIR + "testjar" + i + ".jar"; + File file = new File(dataFile); + output = new BufferedWriter(new FileWriter(file)); + output.write("sample"); + output.close(); + } + } + + // Check that all the jars are added to the classpath + @Test + public void testSanity() throws URISyntaxException, IOException { + SessionState ss = Mockito.spy(SessionState.start(conf).get()); + String query = "testQuery"; + + // add all the dependencies to a list + List list = new LinkedList(); + List addList = new LinkedList(); + list.add(new URI(TEST_JAR_DIR + "testjar1.jar")); + list.add(new URI(TEST_JAR_DIR + "testjar2.jar")); + list.add(new URI(TEST_JAR_DIR + "testjar3.jar")); + list.add(new URI(TEST_JAR_DIR + "testjar4.jar")); + list.add(new URI(TEST_JAR_DIR + "testjar5.jar")); + + //return all the dependency urls + Mockito.when(ss.resolveAndDownload(t, query, false)).thenReturn(list); + addList.add(query); + ss.add_resources(t, addList); + Set dependencies = ss.list_resource(t, null); + LinkedList actual = new LinkedList(); + for (String dependency : dependencies) { + actual.add(new URI(dependency)); + } + + // sort both the lists + Collections.sort(list); + Collections.sort(actual); + + assertEquals(list, actual); + ss.close(); + + } + + // add same jar multiple times and check that dependencies are added only once. + @Test + public void testDuplicateAdds() throws URISyntaxException, IOException { + + SessionState ss = Mockito.spy(SessionState.start(conf).get()); + + String query = "testQuery"; + + List list = new LinkedList(); + List addList = new LinkedList(); + list.add(new URI(TEST_JAR_DIR + "testjar1.jar")); + list.add(new URI(TEST_JAR_DIR + "testjar2.jar")); + list.add(new URI(TEST_JAR_DIR + "testjar3.jar")); + list.add(new URI(TEST_JAR_DIR + "testjar4.jar")); + list.add(new URI(TEST_JAR_DIR + "testjar5.jar")); + + Collections.sort(list); + + Mockito.when(ss.resolveAndDownload(t, query, false)).thenReturn(list); + for (int i = 0; i < 10; i++) { + addList.add(query); + } + ss.add_resources(t, addList); + Set dependencies = ss.list_resource(t, null); + LinkedList actual = new LinkedList(); + for (String dependency : dependencies) { + actual.add(new URI(dependency)); + } + + Collections.sort(actual); + assertEquals(list, actual); + ss.close(); + + } + + // test when two jars with shared dependencies are added, the classloader contains union of the dependencies + @Test + public void testUnion() throws URISyntaxException, IOException { + + HiveConf conf = new HiveConf(); + SessionState ss = Mockito.spy(SessionState.start(conf).get()); + ResourceType t = ResourceType.JAR; + String query1 = "testQuery1"; + String query2 = "testQuery2"; + List addList = new LinkedList(); + // add dependencies for the jars + List list1 = new LinkedList(); + List list2 = new LinkedList(); + list1.add(new URI(TEST_JAR_DIR + "testjar1.jar")); + list1.add(new URI(TEST_JAR_DIR + "testjar2.jar")); + list1.add(new URI(TEST_JAR_DIR + "testjar3.jar")); + list1.add(new URI(TEST_JAR_DIR + "testjar4.jar")); + list2.add(new URI(TEST_JAR_DIR + "testjar5.jar")); + list2.add(new URI(TEST_JAR_DIR + "testjar3.jar")); + list2.add(new URI(TEST_JAR_DIR + "testjar4.jar")); + + Mockito.when(ss.resolveAndDownload(t, query1, false)).thenReturn(list1); + Mockito.when(ss.resolveAndDownload(t, query2, false)).thenReturn(list2); + addList.add(query1); + addList.add(query2); + ss.add_resources(t, addList); + + Set dependencies = ss.list_resource(t, null); + LinkedList actual = new LinkedList(); + for (String dependency : dependencies) { + actual.add(new URI(dependency)); + } + List expected = union(list1, list2); + + Collections.sort(expected); + Collections.sort(actual); + + assertEquals(expected, actual); + ss.close(); + + } + + // Test when two jars are added with shared dependencies and one jar is deleted, the shared dependencies should not be deleted + @Test + public void testDeleteJar() throws URISyntaxException, IOException { + SessionState ss = Mockito.spy(SessionState.start(conf).get()); + + String query1 = "testQuery1"; + String query2 = "testQuery2"; + + List list1 = new LinkedList(); + List list2 = new LinkedList(); + List addList = new LinkedList(); + list1.add(new URI(TEST_JAR_DIR + "testjar1.jar")); + list1.add(new URI(TEST_JAR_DIR + "testjar2.jar")); + list1.add(new URI(TEST_JAR_DIR + "testjar3.jar")); + list1.add(new URI(TEST_JAR_DIR + "testjar4.jar")); + list2.add(new URI(TEST_JAR_DIR + "testjar5.jar")); + list2.add(new URI(TEST_JAR_DIR + "testjar3.jar")); + list2.add(new URI(TEST_JAR_DIR + "testjar4.jar")); + + Collections.sort(list1); + Collections.sort(list2); + + Mockito.when(ss.resolveAndDownload(t, query1, false)).thenReturn(list1); + Mockito.when(ss.resolveAndDownload(t, query2, false)).thenReturn(list2); + addList.add(query1); + addList.add(query2); + ss.add_resources(t, addList); + List deleteList = new LinkedList(); + deleteList.add(list1.get(0).toString()); + // delete jar and its dependencies added using query1 + ss.delete_resources(t, deleteList); + + Set dependencies = ss.list_resource(t, null); + LinkedList actual = new LinkedList(); + for (String dependency : dependencies) { + actual.add(new URI(dependency)); + } + List expected = list2; + Collections.sort(expected); + Collections.sort(actual); + assertEquals(expected, actual); + + deleteList.clear(); + deleteList.add(list2.get(0).toString()); + // delete remaining jars + ss.delete_resources(t, deleteList); + dependencies = ss.list_resource(t, null); + assertEquals(dependencies.isEmpty(), true); + + ss.close(); + + } + + // same test as above but with 3 jars sharing dependencies + @Test + public void testDeleteJarMultiple() throws URISyntaxException, IOException { + SessionState ss = Mockito.spy(SessionState.start(conf).get()); + + String query1 = "testQuery1"; + String query2 = "testQuery2"; + String query3 = "testQuery3"; + + List list1 = new LinkedList(); + List list2 = new LinkedList(); + List list3 = new LinkedList(); + List addList = new LinkedList(); + list1.add(new URI(TEST_JAR_DIR + "testjar1.jar")); + list1.add(new URI(TEST_JAR_DIR + "testjar2.jar")); + list1.add(new URI(TEST_JAR_DIR + "testjar3.jar")); + list1.add(new URI(TEST_JAR_DIR + "testjar4.jar")); + list2.add(new URI(TEST_JAR_DIR + "testjar5.jar")); + list2.add(new URI(TEST_JAR_DIR + "testjar3.jar")); + list2.add(new URI(TEST_JAR_DIR + "testjar4.jar")); + list3.add(new URI(TEST_JAR_DIR + "testjar4.jar")); + list3.add(new URI(TEST_JAR_DIR + "testjar2.jar")); + list3.add(new URI(TEST_JAR_DIR + "testjar5.jar")); + + Collections.sort(list1); + Collections.sort(list2); + Collections.sort(list3); + + Mockito.when(ss.resolveAndDownload(t, query1, false)).thenReturn(list1); + Mockito.when(ss.resolveAndDownload(t, query2, false)).thenReturn(list2); + Mockito.when(ss.resolveAndDownload(t, query3, false)).thenReturn(list3); + addList.add(query1); + addList.add(query2); + addList.add(query3); + ss.add_resources(t, addList); + + List deleteList = new LinkedList(); + deleteList.add(list1.get(0).toString()); + // delete jar added using query1 + ss.delete_resources(t, deleteList); + + Set dependencies = ss.list_resource(t, null); + LinkedList actual = new LinkedList(); + for (String dependency : dependencies) { + actual.add(new URI(dependency)); + } + List expected = union(list2, list3); + Collections.sort(expected); + Collections.sort(actual); + assertEquals(expected, actual); + + actual.clear(); + expected.clear(); + + deleteList.clear(); + deleteList.add(list2.get(0).toString()); + // delete jars added using query2 + ss.delete_resources(t, deleteList); + dependencies = ss.list_resource(t, null); + actual = new LinkedList(); + for (String dependency : dependencies) { + actual.add(new URI(dependency)); + } + expected = new LinkedList(list3); + Collections.sort(expected); + Collections.sort(actual); + assertEquals(expected, actual); + + actual.clear(); + expected.clear(); + + // delete remaining jars + deleteList.clear(); + deleteList.add(list3.get(0).toString()); + ss.delete_resources(t, deleteList); + + dependencies = ss.list_resource(t, null); + assertEquals(dependencies.isEmpty(), true); + + ss.close(); + } + + @After + public void tearDown() { + // delete sample jars + for (int i = 1; i <= 5; i++) { + String dataFile = TEST_JAR_DIR + "testjar" + i + ".jar"; + + File f = new File(dataFile); + if (!f.delete()) { + throw new RuntimeException("Could not delete the data file"); + } + } + } + + private List union(List list1, List list2) { + Set set = new HashSet(); + + set.addAll(list1); + set.addAll(list2); + + return new LinkedList(set); + } + +} Index: ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLastDay.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLastDay.java (revision 1673556) +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLastDay.java (working copy) @@ -17,16 +17,18 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import java.sql.Timestamp; + +import junit.framework.TestCase; + import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; -import junit.framework.TestCase; - public class TestGenericUDFLastDay extends TestCase { public void testLastDay() throws HiveException { @@ -65,8 +67,33 @@ runAndVerify("2014-01-32 10:30:45", "2014-02-28", udf); runAndVerify("01/14/2014 10:30:45", null, udf); runAndVerify("2016-02-28T10:30:45", "2016-02-29", udf); + // negative Unix time + runAndVerifyTs("1966-01-31 00:00:01", "1966-01-31", udf); + runAndVerifyTs("1966-01-31 10:00:01", "1966-01-31", udf); + runAndVerifyTs("1966-01-31 23:59:59", "1966-01-31", udf); } + public void testLastDayTs() throws HiveException { + GenericUDFLastDay udf = new GenericUDFLastDay(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + ObjectInspector[] arguments = { valueOI0 }; + + udf.initialize(arguments); + // positive Unix time + runAndVerifyTs("2014-01-01 10:30:45", "2014-01-31", udf); + runAndVerifyTs("2014-01-14 10:30:45", "2014-01-31", udf); + runAndVerifyTs("2014-01-31 10:30:45.1", "2014-01-31", udf); + runAndVerifyTs("2014-02-02 10:30:45.100", "2014-02-28", udf); + runAndVerifyTs("2014-02-28 10:30:45.001", "2014-02-28", udf); + runAndVerifyTs("2016-02-03 10:30:45.000000001", "2016-02-29", udf); + runAndVerifyTs("2016-02-28 10:30:45", "2016-02-29", udf); + runAndVerifyTs("2016-02-29 10:30:45", "2016-02-29", udf); + // negative Unix time + runAndVerifyTs("1966-01-31 00:00:01", "1966-01-31", udf); + runAndVerifyTs("1966-01-31 10:00:01", "1966-01-31", udf); + runAndVerifyTs("1966-01-31 23:59:59", "1966-01-31", udf); + } + private void runAndVerify(String str, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null); @@ -74,4 +101,12 @@ Text output = (Text) udf.evaluate(args); assertEquals("last_day() test ", expResult, output != null ? output.toString() : null); } + + private void runAndVerifyTs(String str, String expResult, GenericUDF udf) throws HiveException { + DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new TimestampWritable( + Timestamp.valueOf(str)) : null); + DeferredObject[] args = { valueObj0 }; + Text output = (Text) udf.evaluate(args); + assertEquals("last_day() test ", expResult, output != null ? output.toString() : null); + } } Index: ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java (working copy) @@ -0,0 +1,228 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMonthsBetween; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.Text; + +import junit.framework.TestCase; + +public class TestGenericUDFMonthsBetween extends TestCase { + + public void testMonthsBetweenForString() throws HiveException { + GenericUDFMonthsBetween udf = new GenericUDFMonthsBetween(); + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector[] arguments = { valueOI1, valueOI2 }; + + udf.initialize(arguments); + + // test month diff with fraction considering time components + runTestStr("1995-02-02", "1995-01-01", 1.03225806, udf); + runTestStr("2003-07-17", "2005-07-06", -23.64516129, udf); + // test the last day of month + runTestStr("2001-06-30", "2000-05-31", 13.0, udf); + // test the same day of month + runTestStr("2000-06-01", "2004-07-01", -49.0, udf); + // test February of non-leap year, 2/28 + runTestStr("2002-02-28", "2002-03-01", -0.12903226, udf); + // test February of non-leap year, 2/31 is viewd as 3/3 due to 3 days diff + // from 2/31 to 2/28 + runTestStr("2002-02-31", "2002-03-01", 0.06451613, udf); + + // test Feb of leap year, 2/29 + runTestStr("2012-02-29", "2012-03-01", -0.09677419, udf); + // test february of leap year, 2/31 is viewed as 3/2 due to 2 days diff from + // 2/31 to 2/29 + runTestStr("2012-02-31", "2012-03-01", 0.03225806, udf); + + // time part + // test that there is no lead second adjustment + runTestStr("1976-01-01 00:00:00", "1975-12-31 23:59:59", 0.00000037, udf); + // test UDF considers the difference in time components date1 and date2 + runTestStr("1997-02-28 10:30:00", "1996-10-30", 3.94959677, udf); + runTestStr("1996-10-30", "1997-02-28 10:30:00", -3.94959677, udf); + + // if both are last day of the month then time part should be ignored + runTestStr("2002-03-31", "2002-02-28", 1.0, udf); + runTestStr("2002-03-31", "2002-02-28 10:30:00", 1.0, udf); + runTestStr("2002-03-31 10:30:00", "2002-02-28", 1.0, udf); + // if the same day of the month then time part should be ignored + runTestStr("2002-03-24", "2002-02-24", 1.0, udf); + runTestStr("2002-03-24", "2002-02-24 10:30:00", 1.0, udf); + runTestStr("2002-03-24 10:30:00", "2002-02-24", 1.0, udf); + + // partial time. time part will be skipped + runTestStr("1995-02-02 10:39", "1995-01-01", 1.03225806, udf); + runTestStr("1995-02-02", "1995-01-01 10:39", 1.03225806, udf); + // no leading 0 for month and day should work + runTestStr("1995-02-2", "1995-1-01", 1.03225806, udf); + runTestStr("1995-2-02", "1995-01-1", 1.03225806, udf); + // short year should work + runTestStr("495-2-02", "495-01-1", 1.03225806, udf); + runTestStr("95-2-02", "95-01-1", 1.03225806, udf); + runTestStr("5-2-02", "5-01-1", 1.03225806, udf); + + // Test with null args + runTestStr(null, "2002-03-01", null, udf); + runTestStr("2002-02-28", null, null, udf); + runTestStr(null, null, null, udf); + + // string dates without day should be parsed to null + runTestStr("2002-03", "2002-02-24", null, udf); + runTestStr("2002-03-24", "2002-02", null, udf); + } + + public void testMonthsBetweenForTimestamp() throws HiveException { + GenericUDFMonthsBetween udf = new GenericUDFMonthsBetween(); + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + ObjectInspector[] arguments = { valueOI1, valueOI2 }; + + udf.initialize(arguments); + + // test month diff with fraction considering time components + runTestTs("1995-02-02 00:00:00", "1995-01-01 00:00:00", 1.03225806, udf); + runTestTs("2003-07-17 00:00:00", "2005-07-06 00:00:00", -23.64516129, udf); + // test the last day of month + runTestTs("2001-06-30 00:00:00", "2000-05-31 00:00:00", 13.0, udf); + // test the same day of month + runTestTs("2000-06-01 00:00:00", "2004-07-01 00:00:00", -49.0, udf); + // test February of non-leap year, 2/28 + runTestTs("2002-02-28 00:00:00", "2002-03-01 00:00:00", -0.12903226, udf); + // test February of non-leap year, 2/31 is viewd as 3/3 due to 3 days diff + // from 2/31 to 2/28 + runTestTs("2002-02-31 00:00:00", "2002-03-01 00:00:00", 0.06451613, udf); + + // test Feb of leap year, 2/29 + runTestTs("2012-02-29 00:00:00", "2012-03-01 00:00:00", -0.09677419, udf); + // test february of leap year, 2/31 is viewed as 3/2 due to 2 days diff from + // 2/31 to 2/29 + runTestTs("2012-02-31 00:00:00", "2012-03-01 00:00:00", 0.03225806, udf); + + // time part + // test that there is no lead second adjustment + runTestTs("1976-01-01 00:00:00", "1975-12-31 23:59:59", 0.00000037, udf); + // test UDF considers the difference in time components date1 and date2 + runTestTs("1997-02-28 10:30:00", "1996-10-30 00:00:00", 3.94959677, udf); + runTestTs("1996-10-30 00:00:00", "1997-02-28 10:30:00", -3.94959677, udf); + + // if both are last day of the month then time part should be ignored + runTestTs("2002-03-31 00:00:00", "2002-02-28 00:00:00", 1.0, udf); + runTestTs("2002-03-31 00:00:00", "2002-02-28 10:30:00", 1.0, udf); + runTestTs("2002-03-31 10:30:00", "2002-02-28 00:00:00", 1.0, udf); + // if the same day of the month then time part should be ignored + runTestTs("2002-03-24 00:00:00", "2002-02-24 00:00:00", 1.0, udf); + runTestTs("2002-03-24 00:00:00", "2002-02-24 10:30:00", 1.0, udf); + runTestTs("2002-03-24 10:30:00", "2002-02-24 00:00:00", 1.0, udf); + + // Test with null args + runTestTs(null, "2002-03-01 00:00:00", null, udf); + runTestTs("2002-02-28 00:00:00", null, null, udf); + runTestTs(null, null, null, udf); + } + + public void testMonthsBetweenForDate() throws HiveException { + GenericUDFMonthsBetween udf = new GenericUDFMonthsBetween(); + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableDateObjectInspector; + ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableDateObjectInspector; + ObjectInspector[] arguments = { valueOI1, valueOI2 }; + + udf.initialize(arguments); + + // test month diff with fraction considering time components + runTestDt("1995-02-02", "1995-01-01", 1.03225806, udf); + runTestDt("2003-07-17", "2005-07-06", -23.64516129, udf); + // test the last day of month + runTestDt("2001-06-30", "2000-05-31", 13.0, udf); + // test the same day of month + runTestDt("2000-06-01", "2004-07-01", -49.0, udf); + // test February of non-leap year, 2/28 + runTestDt("2002-02-28", "2002-03-01", -0.12903226, udf); + // test February of non-leap year, 2/31 is viewd as 3/3 due to 3 days diff + // from 2/31 to 2/28 + runTestDt("2002-02-31", "2002-03-01", 0.06451613, udf); + + // test Feb of leap year, 2/29 + runTestDt("2012-02-29", "2012-03-01", -0.09677419, udf); + // test february of leap year, 2/31 is viewed as 3/2 due to 2 days diff from + // 2/31 to 2/29 + runTestDt("2012-02-31", "2012-03-01", 0.03225806, udf); + // Test with null args + runTestDt(null, "2002-03-01", null, udf); + runTestDt("2002-02-28", null, null, udf); + runTestDt(null, null, null, udf); + } + + protected void runTestStr(String date1, String date2, Double expDiff, GenericUDFMonthsBetween udf) + throws HiveException { + DeferredJavaObject valueObj1 = new DeferredJavaObject(date1 == null ? null : new Text(date1)); + DeferredJavaObject valueObj2 = new DeferredJavaObject(date2 == null ? null : new Text(date2)); + DeferredObject[] args = new DeferredObject[] { valueObj1, valueObj2 }; + DoubleWritable output = (DoubleWritable) udf.evaluate(args); + if (expDiff == null) { + assertNull("months_between() test for NULL STRING failed", output); + } else { + assertNotNull("months_between() test for NOT NULL STRING failed", output); + assertEquals("months_between() test for STRING failed", expDiff, output.get(), 0.00000001D); + } + } + + protected void runTestTs(String ts1, String ts2, Double expDiff, GenericUDFMonthsBetween udf) + throws HiveException { + TimestampWritable tsWr1 = ts1 == null ? null : new TimestampWritable(Timestamp.valueOf(ts1)); + TimestampWritable tsWr2 = ts2 == null ? null : new TimestampWritable(Timestamp.valueOf(ts2)); + DeferredJavaObject valueObj1 = new DeferredJavaObject(tsWr1); + DeferredJavaObject valueObj2 = new DeferredJavaObject(tsWr2); + DeferredObject[] args = new DeferredObject[] { valueObj1, valueObj2 }; + DoubleWritable output = (DoubleWritable) udf.evaluate(args); + if (expDiff == null) { + assertNull("months_between() test for NULL TIMESTAMP failed", output); + } else { + assertNotNull("months_between() test for NOT NULL TIMESTAMP failed", output); + assertEquals("months_between() test for TIMESTAMP failed", expDiff, output.get(), 0.00000001D); + } + } + + protected void runTestDt(String dt1, String dt2, Double expDiff, GenericUDFMonthsBetween udf) + throws HiveException { + DateWritable dtWr1 = dt1 == null ? null : new DateWritable(Date.valueOf(dt1)); + DateWritable dtWr2 = dt2 == null ? null : new DateWritable(Date.valueOf(dt2)); + DeferredJavaObject valueObj1 = new DeferredJavaObject(dtWr1); + DeferredJavaObject valueObj2 = new DeferredJavaObject(dtWr2); + DeferredObject[] args = new DeferredObject[] { valueObj1, valueObj2 }; + DoubleWritable output = (DoubleWritable) udf.evaluate(args); + if (expDiff == null) { + assertNull("months_between() test for NULL DATE failed", output); + } else { + assertNotNull("months_between() test for NOT NULL DATE failed", output); + assertEquals("months_between() test for DATE failed", expDiff, output.get(), 0.00000001D); + } + } +} \ No newline at end of file Index: ql/src/test/queries/clientnegative/authorization_update_noupdatepriv.q =================================================================== --- ql/src/test/queries/clientnegative/authorization_update_noupdatepriv.q (revision 1673556) +++ ql/src/test/queries/clientnegative/authorization_update_noupdatepriv.q (working copy) @@ -9,7 +9,7 @@ -- check update without update priv -create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); +create table auth_noupd(i int, j int) clustered by (j) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); set user.name=user1; update auth_noupd set i = 0 where i > 0; Index: ql/src/test/queries/clientnegative/ivyDownload.q =================================================================== --- ql/src/test/queries/clientnegative/ivyDownload.q (revision 0) +++ ql/src/test/queries/clientnegative/ivyDownload.q (working copy) @@ -0,0 +1 @@ +CREATE TEMPORARY FUNCTION example_add AS 'UDFExampleAdd'; Index: ql/src/test/queries/clientnegative/update_bucket_col.q =================================================================== --- ql/src/test/queries/clientnegative/update_bucket_col.q (revision 0) +++ ql/src/test/queries/clientnegative/update_bucket_col.q (working copy) @@ -0,0 +1,7 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; + +create table foo(a int, b varchar(128)) partitioned by (ds string) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); + +update foo set a = 5; \ No newline at end of file Index: ql/src/test/queries/clientpositive/authorization_update.q =================================================================== --- ql/src/test/queries/clientpositive/authorization_update.q (revision 1673556) +++ ql/src/test/queries/clientpositive/authorization_update.q (working copy) @@ -9,7 +9,7 @@ set user.name=user1; -- current user has been set (comment line before the set cmd is resulting in parse error!!) -CREATE TABLE t_auth_up(i int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); +CREATE TABLE t_auth_up(i int, j int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); CREATE TABLE t_select(i int); GRANT ALL ON TABLE t_select TO ROLE public; @@ -24,4 +24,4 @@ set user.name=userWIns; -update t_auth_up set i = 0 where i > 0; +update t_auth_up set j = 0 where i > 0; Index: ql/src/test/queries/clientpositive/authorization_update_own_table.q =================================================================== --- ql/src/test/queries/clientpositive/authorization_update_own_table.q (revision 1673556) +++ ql/src/test/queries/clientpositive/authorization_update_own_table.q (working copy) @@ -9,8 +9,8 @@ set user.name=user1; -create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); -update auth_noupd set i = 0 where i > 0; +create table auth_noupd(i int, j int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); +update auth_noupd set j = 0 where i > 0; set user.name=hive_admin_user; set role admin; Index: ql/src/test/queries/clientpositive/avro_comments.q =================================================================== --- ql/src/test/queries/clientpositive/avro_comments.q (revision 0) +++ ql/src/test/queries/clientpositive/avro_comments.q (working copy) @@ -0,0 +1,150 @@ +-- verify Avro columns comments +DROP TABLE IF EXISTS testAvroComments1; + +CREATE TABLE testAvroComments1 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string", + "doc":"first name of actor playing role" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "doc":"an extra field not in the original file", + "default":"fishfingers and custard" + } + ] +}'); + +DESCRIBE testAvroComments1; +DROP TABLE testAvroComments1; + +DROP TABLE IF EXISTS testAvroComments2; +CREATE TABLE testAvroComments2 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "default":"fishfingers and custard" + } + ] +}'); + +DESCRIBE testAvroComments2; +DROP TABLE testAvroComments2; + +DROP TABLE IF EXISTS testAvroComments3; +CREATE TABLE testAvroComments3 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int" + }, + { + "name":"first_name", + "type":"string" + }, + { + "name":"last_name", + "type":"string" + }, + { + "name":"extra_field", + "type":"string", + "default":"fishfingers and custard" + } + ] +}'); + +DESCRIBE testAvroComments3; +DROP TABLE testAvroComments3; + +DROP TABLE IF EXISTS testAvroComments4; + +CREATE TABLE testAvroComments4 ( + number int COMMENT "Order of playing the role", + first_name string COMMENT "first name of actor playing role", + last_name string COMMENT "last name of actor playing role", + extra_field string COMMENT "an extra field not in the original file") +STORED AS AVRO; + +DESCRIBE testAvroComments4; +DROP TABLE testAvroComments4; + +DROP TABLE IF EXISTS testAvroComments5; + +CREATE TABLE testAvroComments5 ( + number int COMMENT "Order of playing the role", + first_name string, + last_name string COMMENT "last name of actor playing role", + extra_field string) +STORED AS AVRO; + +DESCRIBE testAvroComments5; +DROP TABLE testAvroComments5; + +DROP TABLE IF EXISTS testAvroComments6; + +CREATE TABLE testAvroComments6 ( + number int, + first_name string, + last_name string, + extra_field string) +STORED AS AVRO; + +DESCRIBE testAvroComments6; +DROP TABLE testAvroComments6; + + Index: ql/src/test/queries/clientpositive/avro_compression_enabled.q =================================================================== --- ql/src/test/queries/clientpositive/avro_compression_enabled.q (revision 1673556) +++ ql/src/test/queries/clientpositive/avro_compression_enabled.q (working copy) @@ -29,7 +29,7 @@ { "name":"extra_field", "type":"string", - "doc:":"an extra field not in the original file", + "doc":"an extra field not in the original file", "default":"fishfingers and custard" } ] Index: ql/src/test/queries/clientpositive/avro_evolved_schemas.q =================================================================== --- ql/src/test/queries/clientpositive/avro_evolved_schemas.q (revision 1673556) +++ ql/src/test/queries/clientpositive/avro_evolved_schemas.q (working copy) @@ -30,7 +30,7 @@ { "name":"extra_field", "type":"string", - "doc:":"an extra field not in the original file", + "doc":"an extra field not in the original file", "default":"fishfingers and custard" } ] Index: ql/src/test/queries/clientpositive/avro_joins.q =================================================================== --- ql/src/test/queries/clientpositive/avro_joins.q (revision 1673556) +++ ql/src/test/queries/clientpositive/avro_joins.q (working copy) @@ -31,7 +31,7 @@ { "name":"extra_field", "type":"string", - "doc:":"an extra field not in the original file", + "doc":"an extra field not in the original file", "default":"fishfingers and custard" } ] Index: ql/src/test/queries/clientpositive/columnstats_part_coltype.q =================================================================== --- ql/src/test/queries/clientpositive/columnstats_part_coltype.q (revision 0) +++ ql/src/test/queries/clientpositive/columnstats_part_coltype.q (working copy) @@ -0,0 +1,71 @@ +-- Test type date, int, and string in partition column +drop table if exists partcolstats; + +create table partcolstats (key int, value string) partitioned by (ds date, hr int, part string); +insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') select key, value from src limit 20; +insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') select key, value from src limit 20; +insert into partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') select key, value from src limit 30; +insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') select key, value from src limit 40; +insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') select key, value from src limit 60; + +analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') compute statistics for columns; +describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partA'); +describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partA'); + +describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB'); +describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB'); + +analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns; +describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB'); +describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB'); + +describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA'); +describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA'); + +analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns; +describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA'); +describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA'); + +describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA'); +describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA'); +describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB'); +describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB'); + +analyze table partcolstats partition (ds, hr, part) compute statistics for columns; +describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA'); +describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA'); +describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB'); +describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB'); + +drop table partcolstats; + +-- Test type tinyint, smallint, and bigint in partition column +drop table if exists partcolstatsnum; +create table partcolstatsnum (key int, value string) partitioned by (tint tinyint, sint smallint, bint bigint); +insert into partcolstatsnum partition (tint=100, sint=1000, bint=1000000) select key, value from src limit 30; + +analyze table partcolstatsnum partition (tint=100, sint=1000, bint=1000000) compute statistics for columns; +describe formatted partcolstatsnum.value partition (tint=100, sint=1000, bint=1000000); + +drop table partcolstatsnum; + +-- Test type decimal in partition column +drop table if exists partcolstatsdec; +create table partcolstatsdec (key int, value string) partitioned by (decpart decimal(8,4)); +insert into partcolstatsdec partition (decpart='1000.0001') select key, value from src limit 30; + +analyze table partcolstatsdec partition (decpart='1000.0001') compute statistics for columns; +describe formatted partcolstatsdec.value partition (decpart='1000.0001'); + +drop table partcolstatsdec; + +-- Test type varchar and char in partition column +drop table if exists partcolstatschar; +create table partcolstatschar (key int, value string) partitioned by (varpart varchar(5), charpart char(3)); +insert into partcolstatschar partition (varpart='part1', charpart='aaa') select key, value from src limit 30; + +analyze table partcolstatschar partition (varpart='part1', charpart='aaa') compute statistics for columns; +describe formatted partcolstatschar.value partition (varpart='part1', charpart='aaa'); + +drop table partcolstatschar; + Index: ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q =================================================================== --- ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q (revision 1673556) +++ ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q (working copy) @@ -14,12 +14,12 @@ insert into table acid partition(ds='2008-04-08') values("foo", "bar"); select count(*) from acid where ds='2008-04-08'; -explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08'; -update acid set key = 'foo' where value = 'bar' and ds='2008-04-08'; +explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08'; +update acid set value = 'bar' where key = 'foo' and ds='2008-04-08'; select count(*) from acid where ds='2008-04-08'; -explain update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08'); -update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08'); +explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); +update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); select count(*) from acid where ds in ('2008-04-08'); delete from acid where key = 'foo' and ds='2008-04-08'; @@ -36,12 +36,12 @@ insert into table acid partition(ds='2008-04-08') values("foo", "bar"); select count(*) from acid where ds='2008-04-08'; -explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08'; -update acid set key = 'foo' where value = 'bar' and ds='2008-04-08'; +explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08'; +update acid set value = 'bar' where key = 'foo' and ds='2008-04-08'; select count(*) from acid where ds='2008-04-08'; -explain update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08'); -update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08'); +explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); +update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); select count(*) from acid where ds in ('2008-04-08'); delete from acid where key = 'foo' and ds='2008-04-08'; @@ -58,12 +58,12 @@ insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar"); select count(*) from acid where ds='2008-04-08' and hr=11; -explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11; -update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11; +explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; +update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; select count(*) from acid where ds='2008-04-08' and hr=11; -explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11; -update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11; +explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; +update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; select count(*) from acid where ds='2008-04-08' and hr>=11; delete from acid where key = 'foo' and ds='2008-04-08' and hr=11; @@ -80,12 +80,12 @@ insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar"); select count(*) from acid where ds='2008-04-08' and hr=11; -explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11; -update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11; +explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; +update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; select count(*) from acid where ds='2008-04-08' and hr=11; -explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11; -update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11; +explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; +update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; select count(*) from acid where ds='2008-04-08' and hr>=11; delete from acid where key = 'foo' and ds='2008-04-08' and hr=11; @@ -103,12 +103,12 @@ insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar"); select count(*) from acid where ds='2008-04-08' and hr=11; -explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11; -update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11; +explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; +update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; select count(*) from acid where ds='2008-04-08' and hr=11; -explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11; -update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11; +explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; +update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; select count(*) from acid where ds='2008-04-08' and hr>=11; delete from acid where key = 'foo' and ds='2008-04-08' and hr=11; Index: ql/src/test/queries/clientpositive/explainuser_1.q =================================================================== --- ql/src/test/queries/clientpositive/explainuser_1.q (revision 0) +++ ql/src/test/queries/clientpositive/explainuser_1.q (working copy) @@ -0,0 +1,667 @@ +set hive.explain.user=true; + +explain create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc; +create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc; + +alter table src_orc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31'); +desc extended src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31'); + +explain insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src; +insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src; +explain insert into table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src limit 100; + +explain select count(1) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31'; +explain select sum(hash(key)), sum(hash(value)) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31'; + +alter table src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') concatenate; + + +explain select count(1) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31'; +explain select sum(hash(key)), sum(hash(value)) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31'; + +drop table src_orc_merge_test_part; + +set hive.auto.convert.join=true; + +explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +select src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (select * FROM src WHERE src.key < 10) src1 + JOIN + (select * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +) a; + +set hive.cbo.enable=true; +set hive.exec.check.crossproducts=false; + +set hive.stats.fetch.column.stats=true; +set hive.auto.convert.join=false; + +explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key; +explain select x, y, count(*) from (select key, (c_int+c_float+1+2) as x, sum(c_int) as y from cbo_t1 group by c_float, cbo_t1.c_int, key) R group by y, x; + +explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key order by a) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key order by q/10 desc, r asc) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c order by cbo_t3.c_int+c desc, c; + +explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc) cbo_t1 left outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p left outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int % c asc, cbo_t3.c_int desc; + +explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b+c, a desc) cbo_t1 right outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 2) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c; + +explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by c+a desc) cbo_t1 full outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by p+q desc, r asc) cbo_t2 on cbo_t1.a=p full outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int; + +explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c; + +explain select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc; + +explain select unionsrc.key FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc order by unionsrc.key; + +explain select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc group by unionsrc.key order by unionsrc.key; + +explain select cbo_t1.key from cbo_t1 join cbo_t3 where cbo_t1.key=cbo_t3.key and cbo_t1.key >= 1; +explain select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 left outer join cbo_t2 on cbo_t1.key=cbo_t2.key; +explain select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key; + +explain select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key; +explain select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a; + +explain select * from (select q, b, cbo_t2.p, cbo_t1.c, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q == 2) and (b > 0 or c_int >= 0)) R where (q + 1 = 2) and (R.b > 0 or c_int >= 0); + +explain select * from (select q, b, cbo_t2.p, cbo_t1.c, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 right outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q == 2) and (b > 0 or c_int >= 0)) R where (q + 1 = 2) and (R.b > 0 or c_int >= 0); + +explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key order by x limit 1; +explain select x, y, count(*) from (select key, (c_int+c_float+1+2) as x, sum(c_int) as y from cbo_t1 group by c_float, cbo_t1.c_int, key) R group by y, x order by x,y limit 1; +explain select key from(select key from (select key from cbo_t1 limit 5)cbo_t2 limit 5)cbo_t3 limit 5; +explain select key, c_int from(select key, c_int from (select key, c_int from cbo_t1 order by c_int limit 5)cbo_t1 order by c_int limit 5)cbo_t2 order by c_int limit 5; + +explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key order by a limit 5) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key order by q/10 desc, r asc limit 5) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c order by cbo_t3.c_int+c desc, c limit 5; + +explain select cbo_t1.c_int from cbo_t1 left semi join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0); +explain select * from (select c, b, a from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 left semi join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p left semi join cbo_t3 on cbo_t1.a=key where (b + 1 == 2) and (b > 0 or c >= 0)) R where (b + 1 = 2) and (R.b > 0 or c >= 0); +explain select a, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by a+b desc, c asc) cbo_t1 left semi join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by q+r/10 desc, p) cbo_t2 on cbo_t1.a=p left semi join cbo_t3 on cbo_t1.a=key where (b + 1 >= 0) and (b > 0 or a >= 0) group by a, c having a > 0 and (a >=1 or c >= 1) and (a + c) >= 0 order by c, a; + +explain select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1; + +explain select null from cbo_t1; + +explain select key from cbo_t1 where c_int = -6 or c_int = +6; + +explain select count(cbo_t1.dt) from cbo_t1 join cbo_t2 on cbo_t1.dt = cbo_t2.dt where cbo_t1.dt = '2014' ; + +explain select * +from src_cbo b +where not exists + (select distinct a.key + from src_cbo a + where b.value = a.value and a.value > 'val_2' + ) +; + +explain select * +from src_cbo b +group by key, value +having not exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +; + +create view cv1 as +select * +from src_cbo b +where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +; + +explain select * from cv1; + +explain select * +from (select * + from src_cbo b + where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +; + + +explain select * +from src_cbo +where src_cbo.key in (select key from src_cbo s1 where s1.key > '9') +; + + +explain select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +; + +explain select key, value, count(*) +from src_cbo b +where b.key in (select key from src_cbo where src_cbo.key > '8') +group by key, value +having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key ) +; + +explain select p_mfgr, p_name, avg(p_size) +from part +group by p_mfgr, p_name +having p_name in + (select first_value(p_name) over(partition by p_mfgr order by p_size) from part) +; + +explain select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) order by key +; + +explain select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +; + +explain select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) order by p_name +; + +explain select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) + order by b.p_mfgr +; + +explain select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from cbo_t1; +explain select * from (select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from cbo_t1) cbo_t1; +explain select i, a, h, b, c, d, e, f, g, a as x, a +1 as y from (select max(c_int) over (partition by key order by value range UNBOUNDED PRECEDING) a, min(c_int) over (partition by key order by value range current row) b, count(c_int) over(partition by key order by value range 1 PRECEDING) c, avg(value) over (partition by key order by value range between unbounded preceding and unbounded following) d, sum(value) over (partition by key order by value range between unbounded preceding and current row) e, avg(c_float) over (partition by key order by value range between 1 preceding and unbounded following) f, sum(c_float) over (partition by key order by value range between 1 preceding and current row) g, max(c_float) over (partition by key order by value range between 1 preceding and unbounded following) h, min(c_float) over (partition by key order by value range between 1 preceding and 1 following) i from cbo_t1) cbo_t1; +explain select *, rank() over(partition by key order by value) as rr from src1; + + +set hive.auto.convert.join=false; +set hive.optimize.correlation=false; +explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +set hive.optimize.correlation=true; +explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +set hive.auto.convert.join=true; +set hive.optimize.correlation=true; +explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +set hive.auto.convert.join=false; +set hive.optimize.correlation=false; +explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x LEFT SEMI JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +explain create table abcd (a int, b int, c int, d int); +create table abcd (a int, b int, c int, d int); +LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd; + +set hive.map.aggr=true; +explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; + +set hive.map.aggr=false; +explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; + +explain create table src_rc_merge_test(key int, value string) stored as rcfile; +create table src_rc_merge_test(key int, value string) stored as rcfile; + +load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test; + +set hive.exec.compress.output = true; + +explain create table tgt_rc_merge_test(key int, value string) stored as rcfile; +create table tgt_rc_merge_test(key int, value string) stored as rcfile; +insert into table tgt_rc_merge_test select * from src_rc_merge_test; + +show table extended like `tgt_rc_merge_test`; + +explain select count(1) from tgt_rc_merge_test; +explain select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test; + +alter table tgt_rc_merge_test concatenate; + +show table extended like `tgt_rc_merge_test`; + +explain select count(1) from tgt_rc_merge_test; +explain select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test; + +drop table src_rc_merge_test; +drop table tgt_rc_merge_test; + +explain select src.key from src cross join src src2; + + +explain create table nzhang_Tmp(a int, b string); +create table nzhang_Tmp(a int, b string); + +explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10; +create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10; + + +explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10; + +create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10; + +explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2; + +create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; + +explain create temporary table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); +create temporary table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); + +set hive.map.aggr=false; +set hive.groupby.skewindata=true; + + +explain +select src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (select * FROM src WHERE src.key < 10) src1 + JOIN + (select * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2; + + +CREATE TABLE myinput1(key int, value int); +LOAD DATA LOCAL INPATH '../../data/files/in8.txt' INTO TABLE myinput1; + +explain select * from myinput1 a join myinput1 b on a.key<=>b.value; + +explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key; + +explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key; + +explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value; + +explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value; + +explain select * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value; +explain select * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; +explain select * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value; + +explain select /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value; + +CREATE TABLE smb_input(key int, value int); +LOAD DATA LOCAL INPATH '../../data/files/in4.txt' into table smb_input; +LOAD DATA LOCAL INPATH '../../data/files/in5.txt' into table smb_input; + +set hive.enforce.sorting = true; +set hive.enforce.bucketing = true; + +CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; + +from smb_input +insert overwrite table smb_input1 select * +insert overwrite table smb_input2 select *; + +SET hive.optimize.bucketmapjoin = true; +SET hive.optimize.bucketmapjoin.sortedmerge = true; +SET hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; + +explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key; +explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key AND a.value <=> b.value; +explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key <=> b.key; +explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key; +explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key <=> b.key; + +drop table sales; +drop table things; + +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; + +CREATE TABLE sales (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; + +CREATE TABLE things (id INT, name STRING) partitioned by (ds string) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; + +load data local inpath '../../data/files/sales.txt' INTO TABLE sales; +load data local inpath '../../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23'); +load data local inpath '../../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24'); + +explain select name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id); + +drop table sales; +drop table things; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450'; + +set hive.mapjoin.optimized.hashtable=false; + +explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450'; + +set hive.mapjoin.optimized.hashtable=true; + +explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450'; + +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ); + +explain +select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +; + +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc; + +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +; + +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +; + +explain +select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey; + + +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc); + +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name); + +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +; + +explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr DESC, p_name +))); + +explain +select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 ; + + +explain +select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +; + + +explain create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +sum(p_retailprice) as s +from part +group by p_mfgr, p_brand; + +CREATE TABLE part_4( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +s DOUBLE); + +CREATE TABLE part_5( +p_mfgr STRING, +p_name STRING, +p_size INT, +s2 INT, +r INT, +dr INT, +cud DOUBLE, +fv1 INT); + +explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + + +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) ; + +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) ; + +explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)); + +explain select distinct src.* from src; + +explain select explode(array('a', 'b')); + +set hive.optimize.skewjoin = true; +set hive.skewjoin.key = 2; + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE; +CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4; + + +explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 select src1.key, src2.value; + +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 select src1.key, src2.value; + + + +explain +select /*+ STREAMTABLE(a) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key; + +explain +select /*+ STREAMTABLE(a,c) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key; + +explain FROM T1 a JOIN src c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); +FROM T1 a JOIN src c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); + +explain +select * FROM +(select src.* FROM src) x +JOIN +(select src.* FROM src) Y +ON (x.key = Y.key); + + +explain select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.val; + +explain select sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.key; + +explain select count(1) from T1 a join T1 b on a.key = b.key; + +explain FROM T1 a LEFT OUTER JOIN T2 c ON c.key+1=a.key select sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); + +explain FROM T1 a RIGHT OUTER JOIN T2 c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); + +explain FROM T1 a FULL OUTER JOIN T2 c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)); + +explain select /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k left outer join T1 v on k.key+1=v.key; Index: ql/src/test/queries/clientpositive/explainuser_2.q =================================================================== --- ql/src/test/queries/clientpositive/explainuser_2.q (revision 0) +++ ql/src/test/queries/clientpositive/explainuser_2.q (working copy) @@ -0,0 +1,307 @@ +set hive.explain.user=true; + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; + +CREATE TABLE ss(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE; + +CREATE TABLE sr(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE; + +CREATE TABLE cs(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE; + +INSERT OVERWRITE TABLE ss +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); + +INSERT OVERWRITE TABLE sr +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=12); + +INSERT OVERWRITE TABLE cs +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08'); + + +ANALYZE TABLE ss COMPUTE STATISTICS; +ANALYZE TABLE ss COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3; + +ANALYZE TABLE sr COMPUTE STATISTICS; +ANALYZE TABLE sr COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3; + +ANALYZE TABLE cs COMPUTE STATISTICS; +ANALYZE TABLE cs COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3; + +set hive.auto.convert.join=false; + +EXPLAIN +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); + +EXPLAIN +select +ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3) +FROM +ss,sr,cs,src d1,src d2,src d3,src1,srcpart +where + ss.k1 = d1.key +and sr.k1 = d2.key +and cs.k1 = d3.key +and ss.k2 = sr.k2 +and ss.k3 = sr.k3 +and ss.v1 = src1.value +and ss.v2 = srcpart.value +and sr.v2 = cs.v2 +and sr.v3 = cs.v3 +and ss.v3='ssv3' +and sr.v1='srv1' +and src1.key = 'src1key' +and srcpart.key = 'srcpartkey' +and d1.value = 'd1value' +and d2.value in ('2000Q1','2000Q2','2000Q3') +and d3.value in ('2000Q1','2000Q2','2000Q3') +group by +ss.k1,sr.k2,cs.k3 +order by +ss.k1,sr.k2,cs.k3 +limit 100; + +explain +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value); + + +explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value); + + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + + +EXPLAIN +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); + +EXPLAIN +select +ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3) +FROM +ss,sr,cs,src d1,src d2,src d3,src1,srcpart +where + ss.k1 = d1.key +and sr.k1 = d2.key +and cs.k1 = d3.key +and ss.k2 = sr.k2 +and ss.k3 = sr.k3 +and ss.v1 = src1.value +and ss.v2 = srcpart.value +and sr.v2 = cs.v2 +and sr.v3 = cs.v3 +and ss.v3='ssv3' +and sr.v1='srv1' +and src1.key = 'src1key' +and srcpart.key = 'srcpartkey' +and d1.value = 'd1value' +and d2.value in ('2000Q1','2000Q2','2000Q3') +and d3.value in ('2000Q1','2000Q2','2000Q3') +group by +ss.k1,sr.k2,cs.k3 +order by +ss.k1,sr.k2,cs.k3 +limit 100; + +explain +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value); + + +explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value); + + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; + +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + +set hive.enforce.bucketing=true; +set hive.enforce.sorting = true; +set hive.optimize.bucketingsorting=false; +insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part; + +CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +CREATE TABLE tab2(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab2 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +set hive.convert.join.bucket.mapjoin.tez = false; +set hive.auto.convert.sortmerge.join = true; + +set hive.auto.convert.join.noconditionaltask.size=500; + + +explain +select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key; + +explain +select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value; + +explain +select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key; + +explain +select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key join tab2 s2 on s1.value=s2.value; + +explain +select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key +UNION ALL +select s2.key as key, s2.value as value from tab s2 +) a join tab_part b on (a.key = b.key); + + +explain +select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value +UNION ALL +select s2.key as key, s2.value as value from tab s2 +) a join tab_part b on (a.key = b.key);set hive.explain.user=true; + +explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union all select * from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src union all select key, value from src)z ON (x.value = z.value); + +explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value); + +CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE; +CREATE TABLE b(key STRING, value STRING) STORED AS TEXTFILE; +CREATE TABLE c(key STRING, value STRING) STORED AS TEXTFILE; + +explain +from +( +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union all select * from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +) tmp +INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value; + +explain +FROM +( +SELECT x.key as key, y.value as value from src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key as key, y.value as value from src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key as key, y.value as value from src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value) +) tmp +INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value; + + +CREATE TABLE DEST1(key STRING, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE; + +explain +FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION DISTINCT + select s2.key as key, s2.value as value from src s2) unionsrc +INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key +INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value; + +EXPLAIN FROM UNIQUEJOIN PRESERVE src a (a.key), PRESERVE src1 b (b.key), PRESERVE srcpart c (c.key) SELECT a.key, b.key, c.key; + +set hive.entity.capture.transform=true; + +EXPLAIN +SELECT +TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) +FROM src a join src b +on a.key = b.key; + Index: ql/src/test/queries/clientpositive/extrapolate_part_stats_partial_ndv.q =================================================================== --- ql/src/test/queries/clientpositive/extrapolate_part_stats_partial_ndv.q (revision 0) +++ ql/src/test/queries/clientpositive/extrapolate_part_stats_partial_ndv.q (working copy) @@ -0,0 +1,99 @@ +set hive.metastore.stats.ndv.densityfunction=true; +set hive.stats.fetch.column.stats=true; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +drop table if exists ext_loc; + +create table ext_loc ( + state string, + locid double, + cnt decimal, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_partial_ndv.txt' OVERWRITE INTO TABLE ext_loc; + +drop table if exists loc_orc_1d; + +create table loc_orc_1d ( + state string, + locid double, + cnt decimal, + zip int +) partitioned by(year string) stored as orc; + +insert overwrite table loc_orc_1d partition(year) select * from ext_loc; + +analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid,cnt,zip; + +analyze table loc_orc_1d partition(year='2002') compute statistics for columns state,locid,cnt,zip; + +describe formatted loc_orc_1d.state PARTITION(year='2001'); + +describe formatted loc_orc_1d.state PARTITION(year='2002'); + +describe formatted loc_orc_1d.locid PARTITION(year='2001'); + +describe formatted loc_orc_1d.locid PARTITION(year='2002'); + +describe formatted loc_orc_1d.cnt PARTITION(year='2001'); + +describe formatted loc_orc_1d.cnt PARTITION(year='2002'); + +describe formatted loc_orc_1d.zip PARTITION(year='2001'); + +describe formatted loc_orc_1d.zip PARTITION(year='2002'); + +explain extended select state,locid,cnt,zip from loc_orc_1d; + +analyze table loc_orc_1d partition(year='2000') compute statistics for columns state,locid,cnt,zip; + +analyze table loc_orc_1d partition(year='2003') compute statistics for columns state,locid,cnt,zip; + +describe formatted loc_orc_1d.state PARTITION(year='2000'); + +describe formatted loc_orc_1d.state PARTITION(year='2003'); + +describe formatted loc_orc_1d.locid PARTITION(year='2000'); + +describe formatted loc_orc_1d.locid PARTITION(year='2003'); + +describe formatted loc_orc_1d.cnt PARTITION(year='2000'); + +describe formatted loc_orc_1d.cnt PARTITION(year='2003'); + +describe formatted loc_orc_1d.zip PARTITION(year='2000'); + +describe formatted loc_orc_1d.zip PARTITION(year='2003'); + +explain extended select state,locid,cnt,zip from loc_orc_1d; + +drop table if exists loc_orc_2d; + +create table loc_orc_2d ( + state string, + locid int, + cnt decimal +) partitioned by(zip int, year string) stored as orc; + +insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc; + +analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid,cnt; + +analyze table loc_orc_2d partition(zip=94087, year='2002') compute statistics for columns state,locid,cnt; + +describe formatted loc_orc_2d.state partition(zip=94086, year='2001'); + +describe formatted loc_orc_2d.state partition(zip=94087, year='2002'); + +describe formatted loc_orc_2d.locid partition(zip=94086, year='2001'); + +describe formatted loc_orc_2d.locid partition(zip=94087, year='2002'); + +describe formatted loc_orc_2d.cnt partition(zip=94086, year='2001'); + +describe formatted loc_orc_2d.cnt partition(zip=94087, year='2002'); + +explain extended select state,locid,cnt,zip from loc_orc_2d; Index: ql/src/test/queries/clientpositive/groupby3_map.q =================================================================== --- ql/src/test/queries/clientpositive/groupby3_map.q (revision 1673556) +++ ql/src/test/queries/clientpositive/groupby3_map.q (working copy) @@ -29,6 +29,16 @@ variance(substr(src.value,5)), var_samp(substr(src.value,5)); -SELECT dest1.* FROM dest1; +SELECT +c1, +c2, +round(c3, 11) c3, +c4, +c5, +round(c6, 11) c6, +round(c7, 11) c7, +round(c8, 5) c8, +round(c9, 9) c9 +FROM dest1; Index: ql/src/test/queries/clientpositive/interval_udf.q =================================================================== --- ql/src/test/queries/clientpositive/interval_udf.q (revision 0) +++ ql/src/test/queries/clientpositive/interval_udf.q (working copy) @@ -0,0 +1,8 @@ + +select + year(iym), month(iym), day(idt), hour(idt), minute(idt), second(idt) +from ( + select interval '1-2' year to month iym, interval '3 4:5:6.789' day to second idt + from src limit 1 +) q; + Index: ql/src/test/queries/clientpositive/ivyDownload.q =================================================================== --- ql/src/test/queries/clientpositive/ivyDownload.q (revision 0) +++ ql/src/test/queries/clientpositive/ivyDownload.q (working copy) @@ -0,0 +1,26 @@ +ADD JAR ivy://:udfexampleadd:1.0; + +CREATE TEMPORARY FUNCTION example_add AS 'UDFExampleAdd'; + +EXPLAIN +SELECT example_add(1, 2), + example_add(1, 2, 3), + example_add(1, 2, 3, 4), + example_add(1.1, 2.2), + example_add(1.1, 2.2, 3.3), + example_add(1.1, 2.2, 3.3, 4.4), + example_add(1, 2, 3, 4.4) +FROM src LIMIT 1; + +SELECT example_add(1, 2), + example_add(1, 2, 3), + example_add(1, 2, 3, 4), + example_add(1.1, 2.2), + example_add(1.1, 2.2, 3.3), + example_add(1.1, 2.2, 3.3, 4.4), + example_add(1, 2, 3, 4.4) +FROM src LIMIT 1; + +DROP TEMPORARY FUNCTION example_add; + +DELETE JAR ivy://:udfexampleadd:1.0; Index: ql/src/test/queries/clientpositive/leadlag.q =================================================================== --- ql/src/test/queries/clientpositive/leadlag.q (revision 1673556) +++ ql/src/test/queries/clientpositive/leadlag.q (working copy) @@ -36,7 +36,10 @@ from part window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) ; +set hive.cbo.enable=false; -- 6. testRankInLead +-- disable cbo because of CALCITE-653 + select p_mfgr, p_name, p_size, r1, lead(r1,1,r1) over (distribute by p_mfgr sort by p_name) as deltaRank from ( @@ -45,6 +48,7 @@ from part ) a; +set hive.cbo.enable=true; -- 7. testLeadWithPTF select p_mfgr, p_name, rank() over(distribute by p_mfgr sort by p_name) as r, Index: ql/src/test/queries/clientpositive/non_native_window_udf.q =================================================================== --- ql/src/test/queries/clientpositive/non_native_window_udf.q (revision 0) +++ ql/src/test/queries/clientpositive/non_native_window_udf.q (working copy) @@ -0,0 +1,11 @@ + +create temporary function mylastval as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue'; + +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l, +mylastval(p_size, false) over w1 as m +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + Index: ql/src/test/queries/clientpositive/parquet_columnar.q =================================================================== --- ql/src/test/queries/clientpositive/parquet_columnar.q (revision 1673556) +++ ql/src/test/queries/clientpositive/parquet_columnar.q (working copy) @@ -5,8 +5,8 @@ DROP TABLE IF EXISTS parquet_columnar_renamed; CREATE TABLE parquet_columnar_access_stage ( - s string, - i int, + s string, + i int, f float ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'; @@ -15,12 +15,14 @@ s string, x int, y int, - f float + f float, + address struct ) STORED AS PARQUET; LOAD DATA LOCAL INPATH '../../data/files/parquet_columnar.txt' OVERWRITE INTO TABLE parquet_columnar_access_stage; -INSERT OVERWRITE TABLE parquet_columnar_access SELECT s, i, (i + 1), f FROM parquet_columnar_access_stage; +INSERT OVERWRITE TABLE parquet_columnar_access SELECT s, i, (i + 1), f, named_struct('intVals', +i,'strVals',s) FROM parquet_columnar_access_stage; SELECT * FROM parquet_columnar_access; ALTER TABLE parquet_columnar_access REPLACE COLUMNS (s1 string, x1 int, y1 int, f1 float); Index: ql/src/test/queries/clientpositive/parquet_schema_evolution.q =================================================================== --- ql/src/test/queries/clientpositive/parquet_schema_evolution.q (revision 0) +++ ql/src/test/queries/clientpositive/parquet_schema_evolution.q (working copy) @@ -0,0 +1,27 @@ +-- Some tables might have extra columns and struct elements on the schema than the on Parquet schema; +-- This is called 'schema evolution' as the Parquet file is not ready yet for such new columns; +-- Hive should support this schema, and return NULL values instead; + +DROP TABLE NewStructField; +DROP TABLE NewStructFieldTable; + +CREATE TABLE NewStructField(a struct, a2:struct>) STORED AS PARQUET; + +INSERT OVERWRITE TABLE NewStructField SELECT named_struct('a1', map('k1','v1'), 'a2', named_struct('e1',5)) FROM srcpart LIMIT 5; + +DESCRIBE NewStructField; +SELECT * FROM NewStructField; + +-- Adds new fields to the struct types +ALTER TABLE NewStructField REPLACE COLUMNS (a struct, a2:struct, a3:int>, b int); + +DESCRIBE NewStructField; +SELECT * FROM NewStructField; + +-- Makes sure that new parquet tables contain the new struct field +CREATE TABLE NewStructFieldTable STORED AS PARQUET AS SELECT * FROM NewStructField; +DESCRIBE NewStructFieldTable; +SELECT * FROM NewStructFieldTable; + +DROP TABLE NewStructField; +DROP TABLE NewStructFieldTable; \ No newline at end of file Index: ql/src/test/queries/clientpositive/parquet_table_with_subschema.q =================================================================== --- ql/src/test/queries/clientpositive/parquet_table_with_subschema.q (revision 0) +++ ql/src/test/queries/clientpositive/parquet_table_with_subschema.q (working copy) @@ -0,0 +1,13 @@ +-- Sometimes, the user wants to create a table from just a portion of the file schema; +-- This test makes sure that this scenario works; + +DROP TABLE test; + +-- Current file schema is: (id int, name string, address struct); +-- Creates a table from just a portion of the file schema, including struct elements (test lower/upper case as well) +CREATE TABLE test (Name string, address struct) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/HiveGroup.parquet' OVERWRITE INTO TABLE test; +SELECT * FROM test; + +DROP TABLE test; \ No newline at end of file Index: ql/src/test/queries/clientpositive/tez_join.q =================================================================== --- ql/src/test/queries/clientpositive/tez_join.q (revision 1673556) +++ ql/src/test/queries/clientpositive/tez_join.q (working copy) @@ -24,20 +24,3 @@ (select rt2.id from (select t2.id, t2.od from t2 order by t2.id, t2.od) rt2) vt2 where vt1.id=vt2.id; - -explain -select vt1.id from -(select rt1.id from -(select t1.id, t1.od, count(*) from t1 group by t1.id, t1.od) rt1) vt1 -join -(select rt2.id from -(select t2.id, t2.od, count(*) from t2 group by t2.id, t2.od) rt2) vt2 -where vt1.id=vt2.id; - -select vt1.id from -(select rt1.id from -(select t1.id, t1.od, count(*) from t1 group by t1.id, t1.od) rt1) vt1 -join -(select rt2.id from -(select t2.id, t2.od, count(*) from t2 group by t2.id, t2.od) rt2) vt2 -where vt1.id=vt2.id; Index: ql/src/test/queries/clientpositive/tez_smb_1.q =================================================================== --- ql/src/test/queries/clientpositive/tez_smb_1.q (revision 1673556) +++ ql/src/test/queries/clientpositive/tez_smb_1.q (working copy) @@ -33,3 +33,21 @@ explain select count(*) from tab s1 join tab s3 on s1.key=s3.key; +set hive.auto.convert.join=false; + +explain +select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id; + +select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id; Index: ql/src/test/queries/clientpositive/tez_smb_main.q =================================================================== --- ql/src/test/queries/clientpositive/tez_smb_main.q (revision 1673556) +++ ql/src/test/queries/clientpositive/tez_smb_main.q (working copy) @@ -82,3 +82,20 @@ select s2.key as key, s2.value as value from tab s2 ) a join tab_part b on (a.key = b.key); +explain +select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id; + +select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id; + Index: ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q =================================================================== --- ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q (revision 1673556) +++ ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q (working copy) @@ -88,11 +88,11 @@ set hive.cbo.enable=false; -- NaN -explain +explain select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket; select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket; -- with CBO -explain +explain select percentile_approx(key, 0.5) from bucket; -select percentile_approx(key, 0.5) from bucket; +select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket; Index: ql/src/test/queries/clientpositive/udf_format_number.q =================================================================== --- ql/src/test/queries/clientpositive/udf_format_number.q (revision 1673556) +++ ql/src/test/queries/clientpositive/udf_format_number.q (working copy) @@ -71,3 +71,9 @@ format_number(-12332.2BD, 0), format_number(CAST(12332.567 AS DECIMAL(8, 1)), 4) FROM src tablesample (1 rows); + +-- nulls +SELECT + format_number(cast(null as int), 0), + format_number(12332.123456BD, cast(null as int)), + format_number(cast(null as int), cast(null as int)); Index: ql/src/test/queries/clientpositive/udf_months_between.q =================================================================== --- ql/src/test/queries/clientpositive/udf_months_between.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_months_between.q (working copy) @@ -0,0 +1,88 @@ +describe function months_between; +desc function extended months_between; + +--test string format +explain select months_between('1995-02-02', '1995-01-01'); + +select + months_between('1995-02-02', '1995-01-01'), + months_between('2003-07-17', '2005-07-06'), + months_between('2001-06-30', '2000-05-31'), + months_between('2000-06-01', '2004-07-01'), + months_between('2002-02-28', '2002-03-01'), + months_between('2002-02-31', '2002-03-01'), + months_between('2012-02-29', '2012-03-01'), + months_between('2012-02-31', '2012-03-01'), + months_between('1976-01-01 00:00:00', '1975-12-31 23:59:59'), + months_between('1976-01-01', '1975-12-31 23:59:59'), + months_between('1997-02-28 10:30:00', '1996-10-30'), + -- if both are last day of the month then time part should be ignored + months_between('2002-03-31', '2002-02-28'), + months_between('2002-03-31', '2002-02-28 10:30:00'), + months_between('2002-03-31 10:30:00', '2002-02-28'), + -- if the same day of the month then time part should be ignored + months_between('2002-03-24', '2002-02-24'), + months_between('2002-03-24', '2002-02-24 10:30:00'), + months_between('2002-03-24 10:30:00', '2002-02-24'), + -- partial time. time part will be skipped + months_between('1995-02-02 10:39', '1995-01-01'), + months_between('1995-02-02', '1995-01-01 10:39'), + -- no leading 0 for month and day should work + months_between('1995-02-2', '1995-1-01'), + months_between('1995-2-02', '1995-01-1'), + -- short year should work + months_between('495-2-02', '495-01-1'), + months_between('95-2-02', '95-01-1'), + months_between('5-2-02', '5-01-1'); + +--test timestamp format +select + months_between(cast('1995-02-02 00:00:00' as timestamp), cast('1995-01-01 00:00:00' as timestamp)), + months_between(cast('2003-07-17 00:00:00' as timestamp), cast('2005-07-06 00:00:00' as timestamp)), + months_between(cast('2001-06-30 00:00:00' as timestamp), cast('2000-05-31 00:00:00' as timestamp)), + months_between(cast('2000-06-01 00:00:00' as timestamp), cast('2004-07-01 00:00:00' as timestamp)), + months_between(cast('2002-02-28 00:00:00' as timestamp), cast('2002-03-01 00:00:00' as timestamp)), + months_between(cast('2002-02-31 00:00:00' as timestamp), cast('2002-03-01 00:00:00' as timestamp)), + months_between(cast('2012-02-29 00:00:00' as timestamp), cast('2012-03-01 00:00:00' as timestamp)), + months_between(cast('2012-02-31 00:00:00' as timestamp), cast('2012-03-01 00:00:00' as timestamp)), + months_between(cast('1976-01-01 00:00:00' as timestamp), cast('1975-12-31 23:59:59' as timestamp)), + months_between(cast('1976-01-01' as date), cast('1975-12-31 23:59:59' as timestamp)), + months_between(cast('1997-02-28 10:30:00' as timestamp), cast('1996-10-30' as date)), + -- if both are last day of the month then time part should be ignored + months_between(cast('2002-03-31 00:00:00' as timestamp), cast('2002-02-28 00:00:00' as timestamp)), + months_between(cast('2002-03-31 00:00:00' as timestamp), cast('2002-02-28 10:30:00' as timestamp)), + months_between(cast('2002-03-31 10:30:00' as timestamp), cast('2002-02-28 00:00:00' as timestamp)), + -- if the same day of the month then time part should be ignored + months_between(cast('2002-03-24 00:00:00' as timestamp), cast('2002-02-24 00:00:00' as timestamp)), + months_between(cast('2002-03-24 00:00:00' as timestamp), cast('2002-02-24 10:30:00' as timestamp)), + months_between(cast('2002-03-24 10:30:00' as timestamp), cast('2002-02-24 00:00:00' as timestamp)); + +--test date format +select + months_between(cast('1995-02-02' as date), cast('1995-01-01' as date)), + months_between(cast('2003-07-17' as date), cast('2005-07-06' as date)), + months_between(cast('2001-06-30' as date), cast('2000-05-31' as date)), + months_between(cast('2000-06-01' as date), cast('2004-07-01' as date)), + months_between(cast('2002-02-28' as date), cast('2002-03-01' as date)), + months_between(cast('2002-02-31' as date), cast('2002-03-01' as date)), + months_between(cast('2012-02-29' as date), cast('2012-03-01' as date)), + months_between(cast('2012-02-31' as date), cast('2012-03-01' as date)); + +--test misc with null +select + months_between(cast(null as string), '2012-03-01'), + months_between('2012-02-31', cast(null as timestamp)), + months_between(cast(null as timestamp), cast(null as date)), + months_between(cast(null as string), cast('2012-03-01 00:00:00' as timestamp)), + months_between(cast('2012-02-31 00:00:00' as timestamp), cast(null as string)), + months_between(cast(null as timestamp), cast('2012-03-01' as string)), + months_between(cast('2012-02-31' as date), cast(null as string)), + months_between('2012-02-10', cast(null as string)), + months_between(cast(null as string), '2012-02-10'), + months_between(cast(null as string), cast(null as string)), + months_between('2012-02-10', cast(null as timestamp)), + months_between(cast(null as timestamp), '2012-02-10'), + months_between(cast(null as timestamp), cast(null as timestamp)), + -- string dates without day should be parsed to null + months_between('2012-03', '2012-02-24'), + months_between('2012-03-24', '2012-02'); Index: ql/src/test/queries/clientpositive/update_all_types.q =================================================================== --- ql/src/test/queries/clientpositive/update_all_types.q (revision 1673556) +++ ql/src/test/queries/clientpositive/update_all_types.q (working copy) @@ -5,6 +5,7 @@ create table acid_uat(ti tinyint, si smallint, i int, + j int, bi bigint, f float, d double, @@ -20,6 +21,7 @@ select ctinyint, csmallint, cint, + cint j, cbigint, cfloat, cdouble, @@ -37,7 +39,7 @@ update acid_uat set ti = 1, si = 2, - i = 3, + j = 3, bi = 4, f = 3.14, d = 6.28, Index: ql/src/test/queries/clientpositive/update_tmp_table.q =================================================================== --- ql/src/test/queries/clientpositive/update_tmp_table.q (revision 1673556) +++ ql/src/test/queries/clientpositive/update_tmp_table.q (working copy) @@ -8,7 +8,7 @@ select a,b from acid_utt order by a; -update acid_utt set b = 'fred' where b = '0ruyd6Y50JpdGRf6HqD'; +update acid_utt set a = 'fred' where b = '0ruyd6Y50JpdGRf6HqD'; select * from acid_utt order by a; Index: ql/src/test/queries/clientpositive/vectorized_parquet_types.q =================================================================== --- ql/src/test/queries/clientpositive/vectorized_parquet_types.q (revision 0) +++ ql/src/test/queries/clientpositive/vectorized_parquet_types.q (working copy) @@ -0,0 +1,82 @@ +SET hive.vectorized.execution.enabled=true; + +DROP TABLE parquet_types_staging; +DROP TABLE parquet_types; + +-- init +CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date, + cdecimal decimal(4,2) +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':'; + +CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + cdecimal decimal(4,2) +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging; + +INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), cdecimal FROM parquet_types_staging; + +-- select +explain +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types; + +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types; + +explain +SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; + +SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; + +explain +SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint; + +SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint; \ No newline at end of file Index: ql/src/test/results/clientnegative/alter_concatenate_indexed_table.q.out =================================================================== --- ql/src/test/results/clientnegative/alter_concatenate_indexed_table.q.out (revision 1673556) +++ ql/src/test/results/clientnegative/alter_concatenate_indexed_table.q.out (working copy) @@ -76,5 +76,5 @@ PREHOOK: type: SHOWINDEXES POSTHOOK: query: show indexes on src_rc_concatenate_test POSTHOOK: type: SHOWINDEXES -src_rc_concatenate_test_index src_rc_concatenate_test key default.default__src_rc_concatenate_test_src_rc_concatenate_test_index__ compact +src_rc_concatenate_test_index src_rc_concatenate_test key default__src_rc_concatenate_test_src_rc_concatenate_test_index__ compact FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: can not do merge because source table default.src_rc_concatenate_test is indexed. Index: ql/src/test/results/clientnegative/alter_table_wrong_regex.q.out =================================================================== --- ql/src/test/results/clientnegative/alter_table_wrong_regex.q.out (revision 1673556) +++ ql/src/test/results/clientnegative/alter_table_wrong_regex.q.out (working copy) @@ -18,6 +18,4 @@ PREHOOK: type: ALTERTABLE_SERDEPROPERTIES PREHOOK: Input: default@aa PREHOOK: Output: default@aa -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Unclosed character class near index 7 -[^\](.*) - ^ +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. at least one column must be specified for the table Index: ql/src/test/results/clientnegative/authorization_update_noupdatepriv.q.out =================================================================== --- ql/src/test/results/clientnegative/authorization_update_noupdatepriv.q.out (revision 1673556) +++ ql/src/test/results/clientnegative/authorization_update_noupdatepriv.q.out (working copy) @@ -1,10 +1,10 @@ PREHOOK: query: -- check update without update priv -create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +create table auth_noupd(i int, j int) clustered by (j) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@auth_noupd POSTHOOK: query: -- check update without update priv -create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +create table auth_noupd(i int, j int) clustered by (j) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@auth_noupd Index: ql/src/test/results/clientnegative/ivyDownload.q.out =================================================================== --- ql/src/test/results/clientnegative/ivyDownload.q.out (revision 0) +++ ql/src/test/results/clientnegative/ivyDownload.q.out (working copy) @@ -0,0 +1,5 @@ +PREHOOK: query: CREATE TEMPORARY FUNCTION example_add AS 'UDFExampleAdd' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: example_add +FAILED: Class UDFExampleAdd not found +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.FunctionTask Index: ql/src/test/results/clientnegative/update_bucket_col.q.out =================================================================== --- ql/src/test/results/clientnegative/update_bucket_col.q.out (revision 0) +++ ql/src/test/results/clientnegative/update_bucket_col.q.out (working copy) @@ -0,0 +1,9 @@ +PREHOOK: query: create table foo(a int, b varchar(128)) partitioned by (ds string) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo(a int, b varchar(128)) partitioned by (ds string) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +FAILED: SemanticException [Error 10302]: Updating values of bucketing columns is not supported. Column a. Index: ql/src/test/results/clientpositive/alter_concatenate_indexed_table.q.out =================================================================== --- ql/src/test/results/clientpositive/alter_concatenate_indexed_table.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/alter_concatenate_indexed_table.q.out (working copy) @@ -76,7 +76,7 @@ PREHOOK: type: SHOWINDEXES POSTHOOK: query: show indexes on src_rc_concatenate_test POSTHOOK: type: SHOWINDEXES -src_rc_concatenate_test_index src_rc_concatenate_test key default.default__src_rc_concatenate_test_src_rc_concatenate_test_index__ compact +src_rc_concatenate_test_index src_rc_concatenate_test key default__src_rc_concatenate_test_src_rc_concatenate_test_index__ compact PREHOOK: query: alter table src_rc_concatenate_test concatenate PREHOOK: type: ALTER_TABLE_MERGE PREHOOK: Input: default@src_rc_concatenate_test @@ -215,7 +215,7 @@ PREHOOK: type: SHOWINDEXES POSTHOOK: query: show indexes on src_rc_concatenate_test_part POSTHOOK: type: SHOWINDEXES -src_rc_concatenate_test_part_index src_rc_concatenate_test_part key default.default__src_rc_concatenate_test_part_src_rc_concatenate_test_part_index__ compact +src_rc_concatenate_test_part_index src_rc_concatenate_test_part key default__src_rc_concatenate_test_part_src_rc_concatenate_test_part_index__ compact PREHOOK: query: alter table src_rc_concatenate_test_part partition (ds='2011') concatenate PREHOOK: type: ALTER_PARTITION_MERGE PREHOOK: Input: default@src_rc_concatenate_test_part Index: ql/src/test/results/clientpositive/alter_partition_coltype.q.out =================================================================== --- ql/src/test/results/clientpositive/alter_partition_coltype.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/alter_partition_coltype.q.out (working copy) @@ -568,7 +568,7 @@ name: default.alter_coltype name: default.alter_coltype Truncated Path -> Alias: - /alter_coltype/dt=100/ts=3.0 [$hdt$_0:$hdt$_0:alter_coltype] + /alter_coltype/dt=100/ts=3.0 [$hdt$_0:alter_coltype] Needs Tagging: false Reduce Operator Tree: Group By Operator Index: ql/src/test/results/clientpositive/annotate_stats_part.q.out =================================================================== --- ql/src/test/results/clientpositive/annotate_stats_part.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out (working copy) @@ -481,106 +481,70 @@ explain select locid from loc_orc where locid>0 and year='2001' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: loc_orc - Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: locid (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: loc_orc + Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: explain select locid,year from loc_orc where locid>0 and year='2001' PREHOOK: type: QUERY POSTHOOK: query: explain select locid,year from loc_orc where locid>0 and year='2001' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: loc_orc - Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: locid (type: int), '2001' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: loc_orc + Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int), '2001' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: explain select * from (select locid,year from loc_orc) test where locid>0 and year='2001' PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select locid,year from loc_orc) test where locid>0 and year='2001' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: loc_orc - Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: locid (type: int), '2001' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: loc_orc + Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int), '2001' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + ListSink Index: ql/src/test/results/clientpositive/annotate_stats_select.q.out =================================================================== --- ql/src/test/results/clientpositive/annotate_stats_select.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out (working copy) @@ -395,7 +395,7 @@ alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 'hello' AS CHAR(5) (type: char(5)) + expressions: 'hello' (type: char(5)) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE ListSink @@ -416,7 +416,7 @@ alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CAST( 'hello' AS varchar(5)) (type: varchar(5)) + expressions: 'hello' (type: varchar(5)) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE ListSink @@ -556,10 +556,10 @@ Select Operator expressions: array(1,2,3) (type: array) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -591,10 +591,10 @@ Select Operator expressions: str_to_map('a=1 b=2 c=3',' ','=') (type: map) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 1840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Index: ql/src/test/results/clientpositive/authorization_update.q.out =================================================================== --- ql/src/test/results/clientpositive/authorization_update.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/authorization_update.q.out (working copy) @@ -1,12 +1,12 @@ PREHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) -CREATE TABLE t_auth_up(i int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +CREATE TABLE t_auth_up(i int, j int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t_auth_up POSTHOOK: query: -- current user has been set (comment line before the set cmd is resulting in parse error!!) -CREATE TABLE t_auth_up(i int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +CREATE TABLE t_auth_up(i int, j int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t_auth_up @@ -52,11 +52,11 @@ default t_auth_up user1 USER UPDATE true -1 user1 default t_auth_up userWIns USER SELECT false -1 user1 default t_auth_up userWIns USER UPDATE false -1 user1 -PREHOOK: query: update t_auth_up set i = 0 where i > 0 +PREHOOK: query: update t_auth_up set j = 0 where i > 0 PREHOOK: type: QUERY PREHOOK: Input: default@t_auth_up PREHOOK: Output: default@t_auth_up -POSTHOOK: query: update t_auth_up set i = 0 where i > 0 +POSTHOOK: query: update t_auth_up set j = 0 where i > 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@t_auth_up POSTHOOK: Output: default@t_auth_up Index: ql/src/test/results/clientpositive/authorization_update_own_table.q.out =================================================================== --- ql/src/test/results/clientpositive/authorization_update_own_table.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/authorization_update_own_table.q.out (working copy) @@ -1,16 +1,16 @@ -PREHOOK: query: create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: query: create table auth_noupd(i int, j int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@auth_noupd -POSTHOOK: query: create table auth_noupd(i int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: query: create table auth_noupd(i int, j int) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@auth_noupd -PREHOOK: query: update auth_noupd set i = 0 where i > 0 +PREHOOK: query: update auth_noupd set j = 0 where i > 0 PREHOOK: type: QUERY PREHOOK: Input: default@auth_noupd PREHOOK: Output: default@auth_noupd -POSTHOOK: query: update auth_noupd set i = 0 where i > 0 +POSTHOOK: query: update auth_noupd set j = 0 where i > 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@auth_noupd POSTHOOK: Output: default@auth_noupd Index: ql/src/test/results/clientpositive/auto_join1.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join1.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/auto_join1.q.out (working copy) @@ -24,11 +24,11 @@ Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:src1 + $hdt$_0:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:src1 + $hdt$_0:src1 TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/auto_join10.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join10.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/auto_join10.q.out (working copy) @@ -23,11 +23,11 @@ Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:src + $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:src + $hdt$_0:$hdt$_0:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/auto_join11.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join11.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/auto_join11.q.out (working copy) @@ -23,11 +23,11 @@ Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:src + $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:src + $hdt$_0:$hdt$_0:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/auto_join12.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join12.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/auto_join12.q.out (working copy) @@ -29,14 +29,14 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:src + $hdt$_0:$hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:src + $hdt$_0:$hdt$_0:$hdt$_0:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -51,7 +51,7 @@ keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/auto_join13.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join13.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/auto_join13.q.out (working copy) @@ -29,14 +29,14 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:src + $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:src + $hdt$_0:$hdt$_0:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -51,7 +51,7 @@ keys: 0 UDFToDouble(_col0) (type: double) 1 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - $hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/auto_join14.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join14.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/auto_join14.q.out (working copy) @@ -28,11 +28,11 @@ Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src + $hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src + $hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/auto_join22.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join22.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/auto_join22.q.out (working copy) @@ -13,14 +13,14 @@ Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:src4 + $hdt$_0:$hdt$_0:$hdt$_0:src4 Fetch Operator limit: -1 - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:src4 + $hdt$_0:$hdt$_1:src4 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:src4 + $hdt$_0:$hdt$_0:$hdt$_0:src4 TableScan alias: src4 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -35,7 +35,7 @@ keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:src4 + $hdt$_0:$hdt$_1:src4 TableScan alias: src4 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/auto_join26.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join26.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/auto_join26.q.out (working copy) @@ -28,11 +28,11 @@ Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_1:x + $hdt$_0:$hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_1:x + $hdt$_0:$hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/auto_join_nulls.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join_nulls.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/auto_join_nulls.q.out (working copy) @@ -34,7 +34,7 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 13630578 -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 Index: ql/src/test/results/clientpositive/auto_join_without_localtask.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join_without_localtask.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/auto_join_without_localtask.q.out (working copy) @@ -270,11 +270,11 @@ Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:a + $hdt$_1:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:a + $hdt$_1:a TableScan alias: a Filter Operator @@ -319,11 +319,11 @@ Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_2:a + $hdt$_2:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_2:a + $hdt$_2:a TableScan alias: a Filter Operator @@ -406,11 +406,11 @@ Local Work: Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$INTNAME + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME + $INTNAME TableScan Stage: Stage-2 @@ -457,11 +457,11 @@ Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:a + $hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:a + $hdt$_0:a TableScan alias: a Filter Operator @@ -639,11 +639,11 @@ Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_2:a + $hdt$_1:$hdt$_2:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_2:a + $hdt$_1:$hdt$_2:a TableScan alias: a Filter Operator @@ -714,11 +714,11 @@ Local Work: Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$INTNAME + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME + $INTNAME TableScan Stage: Stage-2 @@ -748,11 +748,11 @@ Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:a + $hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:a + $hdt$_0:a TableScan alias: a Filter Operator @@ -836,11 +836,11 @@ Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_1:a + $hdt$_1:$hdt$_1:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_1:a + $hdt$_1:$hdt$_1:a TableScan alias: a Filter Operator Index: ql/src/test/results/clientpositive/avro_add_column.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_add_column.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_add_column.q.out (working copy) @@ -24,8 +24,8 @@ POSTHOOK: query: DESCRIBE doctors POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors -number int from deserializer -first_name string from deserializer +number int +first_name string PREHOOK: query: ALTER TABLE doctors ADD COLUMNS (last_name string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@doctors @@ -40,9 +40,9 @@ POSTHOOK: query: DESCRIBE doctors POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors -number int from deserializer -first_name string from deserializer -last_name string from deserializer +number int +first_name string +last_name string PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors PREHOOK: type: LOAD #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/avro_add_column2.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_add_column2.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_add_column2.q.out (working copy) @@ -50,8 +50,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@doctors POSTHOOK: Output: default@doctors_copy -POSTHOOK: Lineage: doctors_copy.first_name SIMPLE [(doctors)doctors.FieldSchema(name:first_name, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: doctors_copy.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: doctors_copy.first_name SIMPLE [(doctors)doctors.FieldSchema(name:first_name, type:string, comment:), ] +POSTHOOK: Lineage: doctors_copy.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:), ] PREHOOK: query: ALTER TABLE doctors_copy ADD COLUMNS (last_name string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@doctors_copy @@ -74,9 +74,9 @@ POSTHOOK: query: DESCRIBE doctors_copy POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors_copy -number int from deserializer -first_name string from deserializer -last_name string from deserializer +number int +first_name string +last_name string PREHOOK: query: SELECT * FROM doctors_copy PREHOOK: type: QUERY PREHOOK: Input: default@doctors_copy Index: ql/src/test/results/clientpositive/avro_add_column3.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_add_column3.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_add_column3.q.out (working copy) @@ -52,8 +52,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@doctors POSTHOOK: Output: default@doctors_copy@part=1 -POSTHOOK: Lineage: doctors_copy PARTITION(part=1).first_name SIMPLE [(doctors)doctors.FieldSchema(name:first_name, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: doctors_copy PARTITION(part=1).number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: doctors_copy PARTITION(part=1).first_name SIMPLE [(doctors)doctors.FieldSchema(name:first_name, type:string, comment:), ] +POSTHOOK: Lineage: doctors_copy PARTITION(part=1).number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:), ] PREHOOK: query: ALTER TABLE doctors_copy ADD COLUMNS (last_name string) PREHOOK: type: ALTERTABLE_ADDCOLS PREHOOK: Input: default@doctors_copy @@ -68,9 +68,9 @@ POSTHOOK: query: DESCRIBE doctors_copy POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors_copy -number int from deserializer -first_name string from deserializer -last_name string from deserializer +number int +first_name string +last_name string part int # Partition Information Index: ql/src/test/results/clientpositive/avro_change_schema.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_change_schema.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_change_schema.q.out (working copy) @@ -38,8 +38,8 @@ POSTHOOK: query: DESCRIBE avro2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@avro2 -string1 string from deserializer -string2 string from deserializer +string1 string +string2 string PREHOOK: query: ALTER TABLE avro2 SET TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "org.apache.hive", "name": "second_schema", "type": "record", @@ -68,6 +68,6 @@ POSTHOOK: query: DESCRIBE avro2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@avro2 -int1 int from deserializer -float1 float from deserializer -double1 double from deserializer +int1 int +float1 float +double1 double Index: ql/src/test/results/clientpositive/avro_comments.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_comments.q.out (revision 0) +++ ql/src/test/results/clientpositive/avro_comments.q.out (working copy) @@ -0,0 +1,398 @@ +PREHOOK: query: -- verify Avro columns comments +DROP TABLE IF EXISTS testAvroComments1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- verify Avro columns comments +DROP TABLE IF EXISTS testAvroComments1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE testAvroComments1 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string", + "doc":"first name of actor playing role" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "doc":"an extra field not in the original file", + "default":"fishfingers and custard" + } + ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testAvroComments1 +POSTHOOK: query: CREATE TABLE testAvroComments1 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string", + "doc":"first name of actor playing role" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "doc":"an extra field not in the original file", + "default":"fishfingers and custard" + } + ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testAvroComments1 +PREHOOK: query: DESCRIBE testAvroComments1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testavrocomments1 +POSTHOOK: query: DESCRIBE testAvroComments1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testavrocomments1 +number int Order of playing the role +first_name string first name of actor playing role +last_name string last name of actor playing role +extra_field string an extra field not in the original file +PREHOOK: query: DROP TABLE testAvroComments1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testavrocomments1 +PREHOOK: Output: default@testavrocomments1 +POSTHOOK: query: DROP TABLE testAvroComments1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testavrocomments1 +POSTHOOK: Output: default@testavrocomments1 +PREHOOK: query: DROP TABLE IF EXISTS testAvroComments2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS testAvroComments2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE testAvroComments2 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "default":"fishfingers and custard" + } + ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testAvroComments2 +POSTHOOK: query: CREATE TABLE testAvroComments2 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "default":"fishfingers and custard" + } + ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testAvroComments2 +PREHOOK: query: DESCRIBE testAvroComments2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testavrocomments2 +POSTHOOK: query: DESCRIBE testAvroComments2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testavrocomments2 +number int Order of playing the role +first_name string +last_name string last name of actor playing role +extra_field string +PREHOOK: query: DROP TABLE testAvroComments2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testavrocomments2 +PREHOOK: Output: default@testavrocomments2 +POSTHOOK: query: DROP TABLE testAvroComments2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testavrocomments2 +POSTHOOK: Output: default@testavrocomments2 +PREHOOK: query: DROP TABLE IF EXISTS testAvroComments3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS testAvroComments3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE testAvroComments3 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int" + }, + { + "name":"first_name", + "type":"string" + }, + { + "name":"last_name", + "type":"string" + }, + { + "name":"extra_field", + "type":"string", + "default":"fishfingers and custard" + } + ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testAvroComments3 +POSTHOOK: query: CREATE TABLE testAvroComments3 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int" + }, + { + "name":"first_name", + "type":"string" + }, + { + "name":"last_name", + "type":"string" + }, + { + "name":"extra_field", + "type":"string", + "default":"fishfingers and custard" + } + ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testAvroComments3 +PREHOOK: query: DESCRIBE testAvroComments3 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testavrocomments3 +POSTHOOK: query: DESCRIBE testAvroComments3 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testavrocomments3 +number int +first_name string +last_name string +extra_field string +PREHOOK: query: DROP TABLE testAvroComments3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testavrocomments3 +PREHOOK: Output: default@testavrocomments3 +POSTHOOK: query: DROP TABLE testAvroComments3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testavrocomments3 +POSTHOOK: Output: default@testavrocomments3 +PREHOOK: query: DROP TABLE IF EXISTS testAvroComments4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS testAvroComments4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE testAvroComments4 ( + number int COMMENT "Order of playing the role", + first_name string COMMENT "first name of actor playing role", + last_name string COMMENT "last name of actor playing role", + extra_field string COMMENT "an extra field not in the original file") +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testAvroComments4 +POSTHOOK: query: CREATE TABLE testAvroComments4 ( + number int COMMENT "Order of playing the role", + first_name string COMMENT "first name of actor playing role", + last_name string COMMENT "last name of actor playing role", + extra_field string COMMENT "an extra field not in the original file") +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testAvroComments4 +PREHOOK: query: DESCRIBE testAvroComments4 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testavrocomments4 +POSTHOOK: query: DESCRIBE testAvroComments4 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testavrocomments4 +number int Order of playing the role +first_name string first name of actor playing role +last_name string last name of actor playing role +extra_field string an extra field not in the original file +PREHOOK: query: DROP TABLE testAvroComments4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testavrocomments4 +PREHOOK: Output: default@testavrocomments4 +POSTHOOK: query: DROP TABLE testAvroComments4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testavrocomments4 +POSTHOOK: Output: default@testavrocomments4 +PREHOOK: query: DROP TABLE IF EXISTS testAvroComments5 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS testAvroComments5 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE testAvroComments5 ( + number int COMMENT "Order of playing the role", + first_name string, + last_name string COMMENT "last name of actor playing role", + extra_field string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testAvroComments5 +POSTHOOK: query: CREATE TABLE testAvroComments5 ( + number int COMMENT "Order of playing the role", + first_name string, + last_name string COMMENT "last name of actor playing role", + extra_field string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testAvroComments5 +PREHOOK: query: DESCRIBE testAvroComments5 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testavrocomments5 +POSTHOOK: query: DESCRIBE testAvroComments5 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testavrocomments5 +number int Order of playing the role +first_name string +last_name string last name of actor playing role +extra_field string +PREHOOK: query: DROP TABLE testAvroComments5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testavrocomments5 +PREHOOK: Output: default@testavrocomments5 +POSTHOOK: query: DROP TABLE testAvroComments5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testavrocomments5 +POSTHOOK: Output: default@testavrocomments5 +PREHOOK: query: DROP TABLE IF EXISTS testAvroComments6 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS testAvroComments6 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE testAvroComments6 ( + number int, + first_name string, + last_name string, + extra_field string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testAvroComments6 +POSTHOOK: query: CREATE TABLE testAvroComments6 ( + number int, + first_name string, + last_name string, + extra_field string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testAvroComments6 +PREHOOK: query: DESCRIBE testAvroComments6 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@testavrocomments6 +POSTHOOK: query: DESCRIBE testAvroComments6 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@testavrocomments6 +number int +first_name string +last_name string +extra_field string +PREHOOK: query: DROP TABLE testAvroComments6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testavrocomments6 +PREHOOK: Output: default@testavrocomments6 +POSTHOOK: query: DROP TABLE testAvroComments6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testavrocomments6 +POSTHOOK: Output: default@testavrocomments6 Index: ql/src/test/results/clientpositive/avro_compression_enabled.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_compression_enabled.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_compression_enabled.q.out (working copy) @@ -29,7 +29,7 @@ { "name":"extra_field", "type":"string", - "doc:":"an extra field not in the original file", + "doc":"an extra field not in the original file", "default":"fishfingers and custard" } ] @@ -68,7 +68,7 @@ { "name":"extra_field", "type":"string", - "doc:":"an extra field not in the original file", + "doc":"an extra field not in the original file", "default":"fishfingers and custard" } ] Index: ql/src/test/results/clientpositive/avro_decimal.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_decimal.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_decimal.q.out (working copy) @@ -79,8 +79,8 @@ POSTHOOK: query: DESC avro_dec POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@avro_dec -name string from deserializer -value decimal(5,2) from deserializer +name string +value decimal(5,2) PREHOOK: query: INSERT OVERWRITE TABLE avro_dec select name, value from dec PREHOOK: type: QUERY PREHOOK: Input: default@dec @@ -153,8 +153,8 @@ POSTHOOK: query: DESC avro_dec1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@avro_dec1 -name string from deserializer -value decimal(4,1) from deserializer +name string +value decimal(4,1) PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.avro' into TABLE avro_dec1 PREHOOK: type: LOAD #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/avro_decimal_native.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_decimal_native.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_decimal_native.q.out (working copy) @@ -65,8 +65,8 @@ POSTHOOK: query: DESC avro_dec POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@avro_dec -name string from deserializer -value decimal(5,2) from deserializer +name string +value decimal(5,2) PREHOOK: query: INSERT OVERWRITE TABLE avro_dec SELECT name, value FROM dec PREHOOK: type: QUERY PREHOOK: Input: default@dec @@ -121,8 +121,8 @@ POSTHOOK: query: DESC avro_dec1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@avro_dec1 -name string from deserializer -value decimal(4,1) from deserializer +name string +value decimal(4,1) PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.avro' INTO TABLE avro_dec1 PREHOOK: type: LOAD #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/avro_evolved_schemas.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_evolved_schemas.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_evolved_schemas.q.out (working copy) @@ -30,7 +30,7 @@ { "name":"extra_field", "type":"string", - "doc:":"an extra field not in the original file", + "doc":"an extra field not in the original file", "default":"fishfingers and custard" } ] @@ -70,7 +70,7 @@ { "name":"extra_field", "type":"string", - "doc:":"an extra field not in the original file", + "doc":"an extra field not in the original file", "default":"fishfingers and custard" } ] @@ -84,10 +84,10 @@ POSTHOOK: query: DESCRIBE doctors_with_new_field POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors_with_new_field -number int from deserializer -first_name string from deserializer -last_name string from deserializer -extra_field string from deserializer +number int Order of playing the role +first_name string first name of actor playing role +last_name string last name of actor playing role +extra_field string an extra field not in the original file PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_with_new_field PREHOOK: type: LOAD #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/avro_joins.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_joins.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_joins.q.out (working copy) @@ -31,7 +31,7 @@ { "name":"extra_field", "type":"string", - "doc:":"an extra field not in the original file", + "doc":"an extra field not in the original file", "default":"fishfingers and custard" } ] @@ -72,7 +72,7 @@ { "name":"extra_field", "type":"string", - "doc:":"an extra field not in the original file", + "doc":"an extra field not in the original file", "default":"fishfingers and custard" } ] @@ -86,10 +86,10 @@ POSTHOOK: query: DESCRIBE doctors4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors4 -number int from deserializer -first_name string from deserializer -last_name string from deserializer -extra_field string from deserializer +number int Order of playing the role +first_name string first name of actor playing role +last_name string last name of actor playing role +extra_field string an extra field not in the original file PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 PREHOOK: type: LOAD #### A masked pattern was here #### @@ -166,9 +166,9 @@ POSTHOOK: query: DESCRIBE episodes POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@episodes -title string from deserializer -air_date string from deserializer -doctor int from deserializer +title string episode title +air_date string initial date +doctor int main actor playing the Doctor in episode PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes PREHOOK: type: LOAD #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/avro_joins_native.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_joins_native.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_joins_native.q.out (working copy) @@ -28,9 +28,9 @@ POSTHOOK: query: DESCRIBE doctors4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors4 -number int from deserializer -first_name string from deserializer -last_name string from deserializer +number int Order of playing the role +first_name string first name of actor playing role +last_name string last name of actor playing role PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 PREHOOK: type: LOAD #### A masked pattern was here #### @@ -61,9 +61,9 @@ POSTHOOK: query: DESCRIBE episodes POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@episodes -title string from deserializer -air_date string from deserializer -doctor int from deserializer +title string episode title +air_date string initial date +doctor int main actor playing the Doctor in episode PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes PREHOOK: type: LOAD #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/avro_native.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_native.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_native.q.out (working copy) @@ -26,9 +26,9 @@ POSTHOOK: query: DESCRIBE doctors POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors -number int from deserializer -first_name string from deserializer -last_name string from deserializer +number int +first_name string +last_name string PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors PREHOOK: type: LOAD #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/avro_partitioned.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_partitioned.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_partitioned.q.out (working copy) @@ -150,27 +150,27 @@ POSTHOOK: Output: default@episodes_partitioned@doctor_pt=5 POSTHOOK: Output: default@episodes_partitioned@doctor_pt=6 POSTHOOK: Output: default@episodes_partitioned@doctor_pt=9 -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] PREHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 PREHOOK: type: QUERY PREHOOK: Input: default@episodes_partitioned @@ -360,27 +360,27 @@ POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=5 POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=6 POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=9 -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] PREHOOK: query: -- Evolve the table schema by adding new array field "cast_and_crew" ALTER TABLE episodes_partitioned_serdeproperties SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' Index: ql/src/test/results/clientpositive/avro_partitioned_native.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_partitioned_native.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_partitioned_native.q.out (working copy) @@ -60,27 +60,27 @@ POSTHOOK: Output: default@episodes_partitioned@doctor_pt=5 POSTHOOK: Output: default@episodes_partitioned@doctor_pt=6 POSTHOOK: Output: default@episodes_partitioned@doctor_pt=9 -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] PREHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 PREHOOK: type: QUERY PREHOOK: Input: default@episodes_partitioned Index: ql/src/test/results/clientpositive/avro_sanity_test.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_sanity_test.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_sanity_test.q.out (working copy) @@ -72,9 +72,9 @@ POSTHOOK: query: DESCRIBE doctors POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors -number int from deserializer -first_name string from deserializer -last_name string from deserializer +number int Order of playing the role +first_name string first name of actor playing role +last_name string last name of actor playing role PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors PREHOOK: type: LOAD #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_schema_evolution_native.q.out (working copy) @@ -60,27 +60,27 @@ POSTHOOK: Output: default@episodes_partitioned@doctor_pt=5 POSTHOOK: Output: default@episodes_partitioned@doctor_pt=6 POSTHOOK: Output: default@episodes_partitioned@doctor_pt=9 -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] -POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:initial date), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:main actor playing the Doctor in episode), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:episode title), ] PREHOOK: query: ALTER TABLE episodes_partitioned SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' WITH Index: ql/src/test/results/clientpositive/avro_schema_literal.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_schema_literal.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/avro_schema_literal.q.out (working copy) @@ -70,20 +70,20 @@ POSTHOOK: query: DESCRIBE avro1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@avro1 -string1 string from deserializer -int1 int from deserializer -tinyint1 int from deserializer -smallint1 int from deserializer -bigint1 bigint from deserializer -boolean1 boolean from deserializer -float1 float from deserializer -double1 double from deserializer -list1 array from deserializer -map1 map from deserializer -struct1 struct from deserializer -union1 uniontype from deserializer -enum1 string from deserializer -nullableint int from deserializer -bytes1 binary from deserializer -fixed1 binary from deserializer -dec1 decimal(5,2) from deserializer +string1 string +int1 int +tinyint1 int +smallint1 int +bigint1 bigint +boolean1 boolean +float1 float +double1 double +list1 array +map1 map +struct1 struct +union1 uniontype +enum1 string +nullableint int +bytes1 binary +fixed1 binary +dec1 decimal(5,2) Index: ql/src/test/results/clientpositive/columnstats_part_coltype.q.out =================================================================== --- ql/src/test/results/clientpositive/columnstats_part_coltype.q.out (revision 0) +++ ql/src/test/results/clientpositive/columnstats_part_coltype.q.out (working copy) @@ -0,0 +1,441 @@ +PREHOOK: query: -- Test type date, int, and string in partition column +drop table if exists partcolstats +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Test type date, int, and string in partition column +drop table if exists partcolstats +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table partcolstats (key int, value string) partitioned by (ds date, hr int, part string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partcolstats +POSTHOOK: query: create table partcolstats (key int, value string) partitioned by (ds date, hr int, part string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partcolstats +PREHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') select key, value from src limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partA +POSTHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') select key, value from src limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partA +POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=2,part=partA).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=2,part=partA).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') select key, value from src limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partB +POSTHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') select key, value from src limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partB +POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=2,part=partB).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=2,part=partB).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') select key, value from src limit 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=3/part=partA +POSTHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') select key, value from src limit 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=3/part=partA +POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=3,part=partA).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=3,part=partA).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') select key, value from src limit 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partA +POSTHOOK: query: insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') select key, value from src limit 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partA +POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-03,hr=3,part=partA).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-03,hr=3,part=partA).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') select key, value from src limit 60 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partB +POSTHOOK: query: insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') select key, value from src limit 60 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partB +POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-03,hr=3,part=partB).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-03,hr=3,part=partB).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@partcolstats +PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA +#### A masked pattern was here #### +POSTHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partcolstats +POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA +#### A masked pattern was here #### +PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key int 27 484 0 18 from deserializer +PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 18 6.8 7 from deserializer +PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type comment + +key int from deserializer +PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type comment + +value string from deserializer +PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@partcolstats +PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA +PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB +#### A masked pattern was here #### +POSTHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partcolstats +POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA +POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB +#### A masked pattern was here #### +PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key int 27 484 0 18 from deserializer +PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 18 6.8 7 from deserializer +PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type comment + +key int from deserializer +PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type comment + +value string from deserializer +PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@partcolstats +PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA +PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB +PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA +#### A masked pattern was here #### +POSTHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partcolstats +POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA +POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB +POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA +#### A masked pattern was here #### +PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key int 27 495 0 28 from deserializer +PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 18 6.833333333333333 7 from deserializer +PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type comment + +key int from deserializer +PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type comment + +value string from deserializer +PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type comment + +key int from deserializer +PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type comment + +value string from deserializer +PREHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@partcolstats +PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA +PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB +PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA +PREHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partA +PREHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partB +#### A masked pattern was here #### +POSTHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partcolstats +POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA +POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB +POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA +POSTHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partA +POSTHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partB +#### A masked pattern was here #### +PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key int 15 495 0 43 from deserializer +PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 34 6.825 7 from deserializer +PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key int 15 495 0 51 from deserializer +PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstats +POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstats +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 53 6.883333333333334 7 from deserializer +PREHOOK: query: drop table partcolstats +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partcolstats +PREHOOK: Output: default@partcolstats +POSTHOOK: query: drop table partcolstats +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partcolstats +POSTHOOK: Output: default@partcolstats +PREHOOK: query: -- Test type tinyint, smallint, and bigint in partition column +drop table if exists partcolstatsnum +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Test type tinyint, smallint, and bigint in partition column +drop table if exists partcolstatsnum +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table partcolstatsnum (key int, value string) partitioned by (tint tinyint, sint smallint, bint bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partcolstatsnum +POSTHOOK: query: create table partcolstatsnum (key int, value string) partitioned by (tint tinyint, sint smallint, bint bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partcolstatsnum +PREHOOK: query: insert into partcolstatsnum partition (tint=100, sint=1000, bint=1000000) select key, value from src limit 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partcolstatsnum@tint=100/sint=1000/bint=1000000 +POSTHOOK: query: insert into partcolstatsnum partition (tint=100, sint=1000, bint=1000000) select key, value from src limit 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partcolstatsnum@tint=100/sint=1000/bint=1000000 +POSTHOOK: Lineage: partcolstatsnum PARTITION(tint=100,sint=1000,bint=1000000).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partcolstatsnum PARTITION(tint=100,sint=1000,bint=1000000).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table partcolstatsnum partition (tint=100, sint=1000, bint=1000000) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@partcolstatsnum +PREHOOK: Input: default@partcolstatsnum@tint=100/sint=1000/bint=1000000 +#### A masked pattern was here #### +POSTHOOK: query: analyze table partcolstatsnum partition (tint=100, sint=1000, bint=1000000) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partcolstatsnum +POSTHOOK: Input: default@partcolstatsnum@tint=100/sint=1000/bint=1000000 +#### A masked pattern was here #### +PREHOOK: query: describe formatted partcolstatsnum.value partition (tint=100, sint=1000, bint=1000000) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstatsnum +POSTHOOK: query: describe formatted partcolstatsnum.value partition (tint=100, sint=1000, bint=1000000) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstatsnum +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 18 6.833333333333333 7 from deserializer +PREHOOK: query: drop table partcolstatsnum +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partcolstatsnum +PREHOOK: Output: default@partcolstatsnum +POSTHOOK: query: drop table partcolstatsnum +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partcolstatsnum +POSTHOOK: Output: default@partcolstatsnum +PREHOOK: query: -- Test type decimal in partition column +drop table if exists partcolstatsdec +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Test type decimal in partition column +drop table if exists partcolstatsdec +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table partcolstatsdec (key int, value string) partitioned by (decpart decimal(8,4)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partcolstatsdec +POSTHOOK: query: create table partcolstatsdec (key int, value string) partitioned by (decpart decimal(8,4)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partcolstatsdec +PREHOOK: query: insert into partcolstatsdec partition (decpart='1000.0001') select key, value from src limit 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partcolstatsdec@decpart=1000.0001 +POSTHOOK: query: insert into partcolstatsdec partition (decpart='1000.0001') select key, value from src limit 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partcolstatsdec@decpart=1000.0001 +POSTHOOK: Lineage: partcolstatsdec PARTITION(decpart=1000.0001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partcolstatsdec PARTITION(decpart=1000.0001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table partcolstatsdec partition (decpart='1000.0001') compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@partcolstatsdec +PREHOOK: Input: default@partcolstatsdec@decpart=1000.0001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table partcolstatsdec partition (decpart='1000.0001') compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partcolstatsdec +POSTHOOK: Input: default@partcolstatsdec@decpart=1000.0001 +#### A masked pattern was here #### +PREHOOK: query: describe formatted partcolstatsdec.value partition (decpart='1000.0001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstatsdec +POSTHOOK: query: describe formatted partcolstatsdec.value partition (decpart='1000.0001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstatsdec +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 18 6.833333333333333 7 from deserializer +PREHOOK: query: drop table partcolstatsdec +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partcolstatsdec +PREHOOK: Output: default@partcolstatsdec +POSTHOOK: query: drop table partcolstatsdec +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partcolstatsdec +POSTHOOK: Output: default@partcolstatsdec +PREHOOK: query: -- Test type varchar and char in partition column +drop table if exists partcolstatschar +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Test type varchar and char in partition column +drop table if exists partcolstatschar +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table partcolstatschar (key int, value string) partitioned by (varpart varchar(5), charpart char(3)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partcolstatschar +POSTHOOK: query: create table partcolstatschar (key int, value string) partitioned by (varpart varchar(5), charpart char(3)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partcolstatschar +PREHOOK: query: insert into partcolstatschar partition (varpart='part1', charpart='aaa') select key, value from src limit 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partcolstatschar@varpart=part1/charpart=aaa +POSTHOOK: query: insert into partcolstatschar partition (varpart='part1', charpart='aaa') select key, value from src limit 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partcolstatschar@varpart=part1/charpart=aaa +POSTHOOK: Lineage: partcolstatschar PARTITION(varpart=part1,charpart=aaa).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partcolstatschar PARTITION(varpart=part1,charpart=aaa).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table partcolstatschar partition (varpart='part1', charpart='aaa') compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@partcolstatschar +PREHOOK: Input: default@partcolstatschar@varpart=part1/charpart=aaa +#### A masked pattern was here #### +POSTHOOK: query: analyze table partcolstatschar partition (varpart='part1', charpart='aaa') compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partcolstatschar +POSTHOOK: Input: default@partcolstatschar@varpart=part1/charpart=aaa +#### A masked pattern was here #### +PREHOOK: query: describe formatted partcolstatschar.value partition (varpart='part1', charpart='aaa') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@partcolstatschar +POSTHOOK: query: describe formatted partcolstatschar.value partition (varpart='part1', charpart='aaa') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@partcolstatschar +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 18 6.833333333333333 7 from deserializer +PREHOOK: query: drop table partcolstatschar +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partcolstatschar +PREHOOK: Output: default@partcolstatschar +POSTHOOK: query: drop table partcolstatschar +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partcolstatschar +POSTHOOK: Output: default@partcolstatschar Index: ql/src/test/results/clientpositive/combine2.q.out =================================================================== --- ql/src/test/results/clientpositive/combine2.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/combine2.q.out (working copy) @@ -564,14 +564,14 @@ name: default.combine2 name: default.combine2 Truncated Path -> Alias: - /combine2/value=2010-04-21 09%3A45%3A00 [$hdt$_0:$hdt$_0:combine2] - /combine2/value=val_0 [$hdt$_0:$hdt$_0:combine2] - /combine2/value=val_2 [$hdt$_0:$hdt$_0:combine2] - /combine2/value=val_4 [$hdt$_0:$hdt$_0:combine2] - /combine2/value=val_5 [$hdt$_0:$hdt$_0:combine2] - /combine2/value=val_8 [$hdt$_0:$hdt$_0:combine2] - /combine2/value=val_9 [$hdt$_0:$hdt$_0:combine2] - /combine2/value=| [$hdt$_0:$hdt$_0:combine2] + /combine2/value=2010-04-21 09%3A45%3A00 [$hdt$_0:combine2] + /combine2/value=val_0 [$hdt$_0:combine2] + /combine2/value=val_2 [$hdt$_0:combine2] + /combine2/value=val_4 [$hdt$_0:combine2] + /combine2/value=val_5 [$hdt$_0:combine2] + /combine2/value=val_8 [$hdt$_0:combine2] + /combine2/value=val_9 [$hdt$_0:combine2] + /combine2/value=| [$hdt$_0:combine2] Needs Tagging: false Reduce Operator Tree: Group By Operator Index: ql/src/test/results/clientpositive/correlationoptimizer1.q.out =================================================================== --- ql/src/test/results/clientpositive/correlationoptimizer1.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/correlationoptimizer1.q.out (working copy) @@ -329,11 +329,11 @@ Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x + $hdt$_0:$hdt$_0:$hdt$_1:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x + $hdt$_0:$hdt$_0:$hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/correlationoptimizer12.q.out =================================================================== --- ql/src/test/results/clientpositive/correlationoptimizer12.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/correlationoptimizer12.q.out (working copy) @@ -27,16 +27,12 @@ TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: value (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) @@ -116,16 +112,12 @@ TableScan alias: y Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: value (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) Index: ql/src/test/results/clientpositive/correlationoptimizer3.q.out =================================================================== --- ql/src/test/results/clientpositive/correlationoptimizer3.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/correlationoptimizer3.q.out (working copy) @@ -504,14 +504,14 @@ Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x + $hdt$_0:$hdt$_0:$hdt$_1:x Fetch Operator limit: -1 - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x + $hdt$_0:$hdt$_0:$hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -526,7 +526,7 @@ keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1208,14 +1208,14 @@ Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x + $hdt$_0:$hdt$_0:$hdt$_1:x Fetch Operator limit: -1 - $hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x + $hdt$_0:$hdt$_0:$hdt$_1:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1230,7 +1230,7 @@ keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/create_like.q.out =================================================================== --- ql/src/test/results/clientpositive/create_like.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/create_like.q.out (working copy) @@ -331,9 +331,9 @@ POSTHOOK: Input: default@doctors # col_name data_type comment -number int from deserializer -first_name string from deserializer -last_name string from deserializer +number int Order of playing the role +first_name string first name of actor playing role +last_name string last name of actor playing role # Detailed Table Information Database: default @@ -380,9 +380,9 @@ POSTHOOK: Input: default@doctors2 # col_name data_type comment -number int from deserializer -first_name string from deserializer -last_name string from deserializer +number int Order of playing the role +first_name string first name of actor playing role +last_name string last name of actor playing role # Detailed Table Information Database: default Index: ql/src/test/results/clientpositive/ctas_colname.q.out =================================================================== --- ql/src/test/results/clientpositive/ctas_colname.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/ctas_colname.q.out (working copy) @@ -174,15 +174,11 @@ TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -340,15 +336,11 @@ TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) Index: ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out =================================================================== --- ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out (working copy) @@ -65,9 +65,9 @@ POSTHOOK: Input: default@acid@ds=2008-04-08 #### A masked pattern was here #### 1001 -PREHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' +PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' +POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -81,18 +81,18 @@ TableScan alias: acid Filter Operator - predicate: (value = 'bar') (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator - expressions: ROW__ID (type: struct), 'foo' (type: string) - outputColumnNames: _col0, _col1 + expressions: ROW__ID (type: struct), 'bar' (type: string) + outputColumnNames: _col0, _col2 Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - value expressions: _col1 (type: string) + value expressions: _col2 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), 'bar' (type: string), '2008-04-08' (type: string) + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), VALUE._col1 (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false @@ -117,12 +117,12 @@ Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' +PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08 PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' +POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08 @@ -138,9 +138,9 @@ POSTHOOK: Input: default@acid@ds=2008-04-08 #### A masked pattern was here #### 1001 -PREHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08') +PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08') +POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -154,18 +154,18 @@ TableScan alias: acid Filter Operator - predicate: (value = 'bar') (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator - expressions: ROW__ID (type: struct), 'foo' (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col3 + expressions: ROW__ID (type: struct), 'bar' (type: string), ds (type: string) + outputColumnNames: _col0, _col2, _col3 Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - value expressions: _col1 (type: string), _col3 (type: string) + value expressions: _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), 'bar' (type: string), VALUE._col2 (type: string) + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false @@ -190,12 +190,12 @@ Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08') +PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08 PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08') +POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08 @@ -303,9 +303,9 @@ POSTHOOK: Input: default@acid@ds=2008-04-08 #### A masked pattern was here #### 1001 -PREHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' +PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' +POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -319,18 +319,18 @@ TableScan alias: acid Filter Operator - predicate: (value = 'bar') (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator - expressions: ROW__ID (type: struct), 'foo' (type: string) - outputColumnNames: _col0, _col1 + expressions: ROW__ID (type: struct), 'bar' (type: string) + outputColumnNames: _col0, _col2 Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - value expressions: _col1 (type: string) + value expressions: _col2 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), 'bar' (type: string), '2008-04-08' (type: string) + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), VALUE._col1 (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false @@ -355,12 +355,12 @@ Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' +PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08 PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' +POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08 @@ -376,9 +376,9 @@ POSTHOOK: Input: default@acid@ds=2008-04-08 #### A masked pattern was here #### 1001 -PREHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08') +PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08') +POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -392,18 +392,18 @@ TableScan alias: acid Filter Operator - predicate: (value = 'bar') (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator - expressions: ROW__ID (type: struct), 'foo' (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col3 + expressions: ROW__ID (type: struct), 'bar' (type: string), ds (type: string) + outputColumnNames: _col0, _col2, _col3 Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - value expressions: _col1 (type: string), _col3 (type: string) + value expressions: _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), 'bar' (type: string), VALUE._col2 (type: string) + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false @@ -428,12 +428,12 @@ Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08') +PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08 PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: update acid set key = 'foo' where value = 'bar' and ds in ('2008-04-08') +POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08 @@ -547,9 +547,9 @@ POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -563,18 +563,18 @@ TableScan alias: acid Filter Operator - predicate: (value = 'bar') (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator - expressions: ROW__ID (type: struct), 'foo' (type: string) - outputColumnNames: _col0, _col1 + expressions: ROW__ID (type: struct), 'bar' (type: string) + outputColumnNames: _col0, _col2 Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - value expressions: _col1 (type: string) + value expressions: _col2 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), VALUE._col1 (type: string), '2008-04-08' (type: string), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false @@ -600,12 +600,12 @@ Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 @@ -621,9 +621,9 @@ POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -637,18 +637,18 @@ TableScan alias: acid Filter Operator - predicate: (value = 'bar') (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator - expressions: ROW__ID (type: struct), 'foo' (type: string), hr (type: int) - outputColumnNames: _col0, _col1, _col4 + expressions: ROW__ID (type: struct), 'bar' (type: string), hr (type: int) + outputColumnNames: _col0, _col2, _col4 Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - value expressions: _col1 (type: string), _col4 (type: int) + value expressions: _col2 (type: string), _col4 (type: int) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col3 (type: int) + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), VALUE._col1 (type: string), '2008-04-08' (type: string), VALUE._col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false @@ -674,14 +674,14 @@ Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 PREHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 @@ -799,9 +799,9 @@ POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -815,18 +815,18 @@ TableScan alias: acid Filter Operator - predicate: (value = 'bar') (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator - expressions: ROW__ID (type: struct), 'foo' (type: string) - outputColumnNames: _col0, _col1 + expressions: ROW__ID (type: struct), 'bar' (type: string) + outputColumnNames: _col0, _col2 Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - value expressions: _col1 (type: string) + value expressions: _col2 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), VALUE._col1 (type: string), '2008-04-08' (type: string), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false @@ -852,12 +852,12 @@ Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 @@ -873,9 +873,9 @@ POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -889,18 +889,18 @@ TableScan alias: acid Filter Operator - predicate: (value = 'bar') (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator - expressions: ROW__ID (type: struct), 'foo' (type: string), hr (type: int) - outputColumnNames: _col0, _col1, _col4 + expressions: ROW__ID (type: struct), 'bar' (type: string), hr (type: int) + outputColumnNames: _col0, _col2, _col4 Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - value expressions: _col1 (type: string), _col4 (type: int) + value expressions: _col2 (type: string), _col4 (type: int) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col3 (type: int) + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), VALUE._col1 (type: string), '2008-04-08' (type: string), VALUE._col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false @@ -926,14 +926,14 @@ Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 PREHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 @@ -1051,9 +1051,9 @@ POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1067,9 +1067,9 @@ TableScan alias: acid Filter Operator - predicate: (value = 'bar') (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator - expressions: ROW__ID (type: struct), 'foo' (type: string), value (type: string), ds (type: string), hr (type: int) + expressions: ROW__ID (type: struct), key (type: string), 'bar' (type: string), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Reduce Output Operator key expressions: _col0 (type: struct) @@ -1104,12 +1104,12 @@ Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 +POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 @@ -1125,9 +1125,9 @@ POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1141,9 +1141,9 @@ TableScan alias: acid Filter Operator - predicate: (value = 'bar') (type: boolean) + predicate: (key = 'foo') (type: boolean) Select Operator - expressions: ROW__ID (type: struct), 'foo' (type: string), value (type: string), ds (type: string), hr (type: int) + expressions: ROW__ID (type: struct), key (type: string), 'bar' (type: string), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Reduce Output Operator key expressions: _col0 (type: struct) @@ -1178,14 +1178,14 @@ Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 PREHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 +POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 Index: ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out =================================================================== --- ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out (working copy) @@ -338,7 +338,7 @@ name: default.src name: default.src Truncated Path -> Alias: - /src [$hdt$_0:src] + /src [src] Needs Tagging: false Reduce Operator Tree: Select Operator Index: ql/src/test/results/clientpositive/explain_logical.q.out =================================================================== --- ql/src/test/results/clientpositive/explain_logical.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/explain_logical.q.out (working copy) @@ -100,7 +100,7 @@ LOGICAL PLAN: -$hdt$_0:$hdt$_0:srcpart +$hdt$_0:srcpart TableScan (TS_0) alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE @@ -108,25 +108,25 @@ expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Group By Operator (GBY_5) + Group By Operator (GBY_4) aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator (RS_6) + Reduce Output Operator (RS_5) key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Group By Operator (GBY_7) + Group By Operator (GBY_6) aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator (FS_9) + File Output Operator (FS_8) compressed: false Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE table: @@ -180,7 +180,7 @@ LOGICAL PLAN: -$hdt$_0:$hdt$_0:src +$hdt$_0:src TableScan (TS_0) alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -188,25 +188,25 @@ expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator (GBY_4) + Group By Operator (GBY_3) aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator (RS_5) + Reduce Output Operator (RS_4) key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Group By Operator (GBY_6) + Group By Operator (GBY_5) aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator (FS_8) + File Output Operator (FS_7) compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: @@ -286,16 +286,16 @@ expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Union (UNION_6) + Union (UNION_5) Statistics: Num rows: 2500 Data size: 26560 Basic stats: COMPLETE Column stats: NONE - File Output Operator (FS_8) + File Output Operator (FS_7) compressed: false Statistics: Num rows: 2500 Data size: 26560 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -null-subquery2:$hdt$_0-subquery2:$hdt$_0:srcpart +null-subquery2:$hdt$_0-subquery2:srcpart TableScan (TS_2) alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE @@ -303,7 +303,7 @@ expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Union (UNION_6) + Union (UNION_5) Statistics: Num rows: 2500 Data size: 26560 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: EXPLAIN LOGICAL @@ -357,11 +357,11 @@ LOGICAL PLAN: -$hdt$_0:$hdt$_0:s2 +$hdt$_0:s2 TableScan (TS_0) alias: s2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_13) + Filter Operator (FIL_12) predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_2) @@ -386,18 +386,18 @@ expressions: _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator (FS_12) + File Output Operator (FS_11) compressed: false Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -$hdt$_0:$hdt$_1:s1 +$hdt$_1:s1 TableScan (TS_3) alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_14) + Filter Operator (FIL_13) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_4) @@ -472,7 +472,7 @@ LOGICAL PLAN: -$hdt$_0:srcpart +srcpart TableScan (TS_0) alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE @@ -480,7 +480,7 @@ expressions: ds (type: string), key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - ListSink (OP_6) + ListSink (OP_5) PREHOOK: query: EXPLAIN LOGICAL SELECT * FROM V3 PREHOOK: type: QUERY @@ -503,23 +503,23 @@ LOGICAL PLAN: -$hdt$_0:$hdt$_0:srcpart +$hdt$_0:srcpart TableScan (TS_0) alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_13) + Filter Operator (FIL_12) predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_2) expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator (RS_7) + Reduce Output Operator (RS_6) key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Join Operator (JOIN_10) + Join Operator (JOIN_9) condition map: Inner Join 0 to 1 keys: @@ -527,11 +527,11 @@ 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator (SEL_11) + Select Operator (SEL_10) expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator (FS_12) + File Output Operator (FS_11) compressed: false Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE table: @@ -539,23 +539,23 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe $hdt$_1:src2 - TableScan (TS_4) + TableScan (TS_3) alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_14) + Filter Operator (FIL_13) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator (SEL_5) + Select Operator (SEL_4) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator (RS_9) + Reduce Output Operator (RS_8) key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Join Operator (JOIN_10) + Join Operator (JOIN_9) condition map: Inner Join 0 to 1 keys: @@ -585,24 +585,24 @@ LOGICAL PLAN: -$hdt$_0:$hdt$_0:$hdt$_0:srcpart +$hdt$_0:srcpart TableScan (TS_0) alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_18) + Filter Operator (FIL_16) predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_2) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator (RS_9) + Reduce Output Operator (RS_8) key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Join Operator (JOIN_14) + Join Operator (JOIN_13) condition map: Inner Join 0 to 1 Inner Join 0 to 2 @@ -612,34 +612,34 @@ 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col4 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator (SEL_15) + Select Operator (SEL_14) expressions: _col2 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - File Output Operator (FS_17) + File Output Operator (FS_15) compressed: false Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -$hdt$_0:$hdt$_1:src - TableScan (TS_4) +$hdt$_1:src + TableScan (TS_3) alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_19) + Filter Operator (FIL_17) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator (SEL_5) + Select Operator (SEL_4) expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator (RS_11) + Reduce Output Operator (RS_10) key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Join Operator (JOIN_14) + Join Operator (JOIN_13) condition map: Inner Join 0 to 1 Inner Join 0 to 2 @@ -649,24 +649,24 @@ 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col4 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE -$hdt$_0:$hdt$_2:src - TableScan (TS_6) +$hdt$_2:src + TableScan (TS_5) alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_20) + Filter Operator (FIL_18) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator (SEL_7) + Select Operator (SEL_6) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator (RS_13) + Reduce Output Operator (RS_12) key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Join Operator (JOIN_14) + Join Operator (JOIN_13) condition map: Inner Join 0 to 1 Inner Join 0 to 2 @@ -800,11 +800,11 @@ LOGICAL PLAN: -$hdt$_0:$hdt$_0:src +$hdt$_0:src TableScan (TS_0) alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_19) + Filter Operator (FIL_18) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_1) @@ -829,27 +829,27 @@ expressions: _col0 (type: string), _col3 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator (RS_16) + Reduce Output Operator (RS_15) key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: string) - Select Operator (SEL_17) + Select Operator (SEL_16) expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator (FS_18) + File Output Operator (FS_17) compressed: false Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -$hdt$_0:$hdt$_1:$hdt$_1:src +$hdt$_1:$hdt$_1:src TableScan (TS_2) alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_20) + Filter Operator (FIL_19) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_3) Index: ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out =================================================================== --- ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out (revision 0) +++ ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out (working copy) @@ -0,0 +1,1385 @@ +PREHOOK: query: drop table if exists ext_loc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists ext_loc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table ext_loc ( + state string, + locid double, + cnt decimal, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ext_loc +POSTHOOK: query: create table ext_loc ( + state string, + locid double, + cnt decimal, + zip int, + year string +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ext_loc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_partial_ndv.txt' OVERWRITE INTO TABLE ext_loc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@ext_loc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/extrapolate_stats_partial_ndv.txt' OVERWRITE INTO TABLE ext_loc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@ext_loc +PREHOOK: query: drop table if exists loc_orc_1d +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists loc_orc_1d +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table loc_orc_1d ( + state string, + locid double, + cnt decimal, + zip int +) partitioned by(year string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc_1d +POSTHOOK: query: create table loc_orc_1d ( + state string, + locid double, + cnt decimal, + zip int +) partitioned by(year string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc_1d +PREHOOK: query: insert overwrite table loc_orc_1d partition(year) select * from ext_loc +PREHOOK: type: QUERY +PREHOOK: Input: default@ext_loc +PREHOOK: Output: default@loc_orc_1d +POSTHOOK: query: insert overwrite table loc_orc_1d partition(year) select * from ext_loc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ext_loc +POSTHOOK: Output: default@loc_orc_1d@year=2000 +POSTHOOK: Output: default@loc_orc_1d@year=2001 +POSTHOOK: Output: default@loc_orc_1d@year=2002 +POSTHOOK: Output: default@loc_orc_1d@year=2003 +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2000).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2001).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2002).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2002).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2002).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2003).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2003).locid SIMPLE [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_1d PARTITION(year=2003).zip SIMPLE [(ext_loc)ext_loc.FieldSchema(name:zip, type:int, comment:null), ] +PREHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid,cnt,zip +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid,cnt,zip +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2001 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_1d partition(year='2002') compute statistics for columns state,locid,cnt,zip +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2002 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2002') compute statistics for columns state,locid,cnt,zip +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2002 +#### A masked pattern was here #### +PREHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 3 0.75 2 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 6 3.0 3 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 4.0 0 5 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 5.0 0 6 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 2000 0 5 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 910 0 4 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 4 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 4 from deserializer +PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_1d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_SELEXPR + TOK_TABLE_OR_COL + cnt + TOK_SELEXPR + TOK_TABLE_OR_COL + zip + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 2 + partition_columns year + partition_columns.types string + rawDataSize 416 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 536 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 4 + partition_columns year + partition_columns.types string + rawDataSize 832 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 570 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 6 + partition_columns year + partition_columns.types string + rawDataSize 1266 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 586 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 8 + partition_columns year + partition_columns.types string + rawDataSize 1672 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 610 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Processor Tree: + TableScan + alias: loc_orc_1d + Statistics: Num rows: 20 Data size: 4186 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Select Operator + expressions: state (type: string), locid (type: double), cnt (type: decimal(10,0)), zip (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20 Data size: 4260 Basic stats: COMPLETE Column stats: PARTIAL + ListSink + +PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state,locid,cnt,zip +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2000 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state,locid,cnt,zip +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2000 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_1d partition(year='2003') compute statistics for columns state,locid,cnt,zip +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_1d +PREHOOK: Input: default@loc_orc_1d@year=2003 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_1d partition(year='2003') compute statistics for columns state,locid,cnt,zip +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_1d +POSTHOOK: Input: default@loc_orc_1d@year=2003 +#### A masked pattern was here #### +PREHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2000') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2000') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 2 0.5 1 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2003') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.state PARTITION(year='2003') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 4 1.25 4 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2000') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2000') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 2.0 0 2 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2003') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.locid PARTITION(year='2003') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid double 1.0 31.0 0 6 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2000') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2000') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 1010 0 3 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2003') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.cnt PARTITION(year='2003') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 2000 0 3 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2000') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2000') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +zip int 94086 94087 0 2 from deserializer +PREHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2003') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_1d +POSTHOOK: query: describe formatted loc_orc_1d.zip PARTITION(year='2003') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_1d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +zip int 43201 94087 0 4 from deserializer +PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_1d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_1d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_SELEXPR + TOK_TABLE_OR_COL + cnt + TOK_SELEXPR + TOK_TABLE_OR_COL + zip + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 2 + partition_columns year + partition_columns.types string + rawDataSize 416 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 536 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 4 + partition_columns year + partition_columns.types string + rawDataSize 832 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 570 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 6 + partition_columns year + partition_columns.types string + rawDataSize 1266 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 586 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + numFiles 1 + numRows 8 + partition_columns year + partition_columns.types string + rawDataSize 1672 + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 610 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt,zip + columns.comments + columns.types string:double:decimal(10,0):int +#### A masked pattern was here #### + name default.loc_orc_1d + partition_columns year + partition_columns.types string + serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_1d + name: default.loc_orc_1d + Processor Tree: + TableScan + alias: loc_orc_1d + Statistics: Num rows: 20 Data size: 4186 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: state (type: string), locid (type: double), cnt (type: decimal(10,0)), zip (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20 Data size: 4220 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: drop table if exists loc_orc_2d +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists loc_orc_2d +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table loc_orc_2d ( + state string, + locid int, + cnt decimal +) partitioned by(zip int, year string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc_2d +POSTHOOK: query: create table loc_orc_2d ( + state string, + locid int, + cnt decimal +) partitioned by(zip int, year string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc_2d +PREHOOK: query: insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc +PREHOOK: type: QUERY +PREHOOK: Input: default@ext_loc +PREHOOK: Output: default@loc_orc_2d +POSTHOOK: query: insert overwrite table loc_orc_2d partition(zip, year) select * from ext_loc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ext_loc +POSTHOOK: Output: default@loc_orc_2d@zip=43201/year=2001 +POSTHOOK: Output: default@loc_orc_2d@zip=43201/year=2002 +POSTHOOK: Output: default@loc_orc_2d@zip=43201/year=2003 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2000 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2001 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2002 +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2003 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2000 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2001 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2002 +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2003 +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2001).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2001).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2002).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2002).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2003).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2003).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=43201,year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2000).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2000).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2001).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2001).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2002).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2002).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2003).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2003).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94086,year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2000).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2000).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2000).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2001).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2001).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2001).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2002).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2002).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2002).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2003).cnt SIMPLE [(ext_loc)ext_loc.FieldSchema(name:cnt, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2003).locid EXPRESSION [(ext_loc)ext_loc.FieldSchema(name:locid, type:double, comment:null), ] +POSTHOOK: Lineage: loc_orc_2d PARTITION(zip=94087,year=2003).state SIMPLE [(ext_loc)ext_loc.FieldSchema(name:state, type:string, comment:null), ] +PREHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid,cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_2d +PREHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid,cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_2d +POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +#### A masked pattern was here #### +PREHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2002') compute statistics for columns state,locid,cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc_2d +PREHOOK: Input: default@loc_orc_2d@zip=94087/year=2002 +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2002') compute statistics for columns state,locid,cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc_2d +POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2002 +#### A masked pattern was here #### +PREHOOK: query: describe formatted loc_orc_2d.state partition(zip=94086, year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.state partition(zip=94086, year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 2 0.5 1 from deserializer +PREHOOK: query: describe formatted loc_orc_2d.state partition(zip=94087, year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.state partition(zip=94087, year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +state string 0 4 3.0 3 from deserializer +PREHOOK: query: describe formatted loc_orc_2d.locid partition(zip=94086, year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.locid partition(zip=94086, year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid int 2 3 0 2 from deserializer +PREHOOK: query: describe formatted loc_orc_2d.locid partition(zip=94087, year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.locid partition(zip=94087, year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +locid int 1 5 0 3 from deserializer +PREHOOK: query: describe formatted loc_orc_2d.cnt partition(zip=94086, year='2001') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.cnt partition(zip=94086, year='2001') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 1000 2000 0 2 from deserializer +PREHOOK: query: describe formatted loc_orc_2d.cnt partition(zip=94087, year='2002') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@loc_orc_2d +POSTHOOK: query: describe formatted loc_orc_2d.cnt partition(zip=94087, year='2002') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@loc_orc_2d +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +cnt decimal(10,0) 10 100 0 2 from deserializer +PREHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_2d +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select state,locid,cnt,zip from loc_orc_2d +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + loc_orc_2d + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + state + TOK_SELEXPR + TOK_TABLE_OR_COL + locid + TOK_SELEXPR + TOK_TABLE_OR_COL + cnt + TOK_SELEXPR + TOK_TABLE_OR_COL + zip + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 202 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 393 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 406 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 415 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 43201 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 603 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 431 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 201 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 391 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 400 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 400 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 203 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 393 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 94086 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 2 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 404 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 418 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2000 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 200 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 375 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 1 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 200 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 368 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2002 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 609 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 419 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2003 + zip 94087 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + numFiles 1 + numRows 3 + partition_columns zip/year + partition_columns.types int:string + rawDataSize 600 + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 422 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,cnt + columns.comments + columns.types string:int:decimal(10,0) +#### A masked pattern was here #### + name default.loc_orc_2d + partition_columns zip/year + partition_columns.types int:string + serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc_2d + name: default.loc_orc_2d + Processor Tree: + TableScan + alias: loc_orc_2d + Statistics: Num rows: 20 Data size: 4028 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Select Operator + expressions: state (type: string), locid (type: int), cnt (type: decimal(10,0)), zip (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20 Data size: 4160 Basic stats: COMPLETE Column stats: PARTIAL + ListSink + Index: ql/src/test/results/clientpositive/groupby3_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby3_map.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/groupby3_map.q.out (working copy) @@ -128,12 +128,32 @@ POSTHOOK: Lineage: dest1.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: query: SELECT +c1, +c2, +round(c3, 11) c3, +c4, +c5, +round(c6, 11) c6, +round(c7, 11) c7, +round(c8, 5) c8, +round(c9, 9) c9 +FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: query: SELECT +c1, +c2, +round(c3, 11) c3, +c4, +c5, +round(c6, 11) c6, +round(c7, 11) c7, +round(c8, 5) c8, +round(c9, 9) c9 +FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876 20469.01089779559 +130091.0 260.182 256.10355987055 98.0 0.0 142.92680950752 143.06995106519 20428.07288 20469.010897796 Index: ql/src/test/results/clientpositive/groupby_map_ppr.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_map_ppr.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/groupby_map_ppr.q.out (working copy) @@ -222,8 +222,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:src] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:src] Needs Tagging: false Reduce Operator Tree: Group By Operator Index: ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out (working copy) @@ -239,8 +239,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:src] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:src] Needs Tagging: false Reduce Operator Tree: Group By Operator Index: ql/src/test/results/clientpositive/groupby_ppr.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_ppr.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/groupby_ppr.q.out (working copy) @@ -215,8 +215,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:src] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:src] Needs Tagging: false Reduce Operator Tree: Group By Operator Index: ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out (working copy) @@ -228,8 +228,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:src] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:src] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:src] Needs Tagging: false Reduce Operator Tree: Group By Operator Index: ql/src/test/results/clientpositive/groupby_sort_1_23.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_1_23.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/groupby_sort_1_23.q.out (working copy) @@ -189,7 +189,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Stage: Stage-7 Conditional Operator @@ -526,7 +526,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -784,7 +784,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Stage: Stage-7 Conditional Operator @@ -1192,7 +1192,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Stage: Stage-7 Conditional Operator @@ -1483,8 +1483,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -1501,17 +1506,39 @@ Group By Operator aggregations: count(1) keys: 1 (type: int), _col1 (type: string) - mode: hash + mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1564,26 +1591,53 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + /t1 [$hdt$_0:t1] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1601,15 +1655,98 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 + name: default.outputtbl3 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-0 - Move Operator - tables: - replace: true + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1625,9 +1762,14 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 + name: default.outputtbl3 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-2 - Stats-Aggr Operator + Stage: Stage-6 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 @@ -1790,7 +1932,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2011,7 +2153,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2278,7 +2420,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:$hdt$_0:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2626,7 +2768,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_0:t1, null-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_0:t1] + /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1, null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] Stage: Stage-7 Conditional Operator @@ -3051,7 +3193,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_0:t1] + /t1 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -3247,7 +3389,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_0:t1] + /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1] #### A masked pattern was here #### Stage: Stage-8 @@ -4305,7 +4447,7 @@ name: default.t2 name: default.t2 Truncated Path -> Alias: - /t2 [$hdt$_0:$hdt$_0:t2] + /t2 [$hdt$_0:t2] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -4453,8 +4595,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -4471,17 +4618,44 @@ Group By Operator aggregations: count(1) keys: _col0 (type: string), 1 (type: int), _col2 (type: string) - mode: hash + mode: final outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4534,26 +4708,58 @@ name: default.t2 name: default.t2 Truncated Path -> Alias: - /t2 [$hdt$_0:$hdt$_0:t2] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + /t2 [$hdt$_0:t2] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4576,15 +4782,118 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + name: default.outputtbl4 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-0 - Move Operator - tables: - replace: true + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -4605,9 +4914,14 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 + name: default.outputtbl4 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-2 - Stats-Aggr Operator + Stage: Stage-6 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 @@ -4696,8 +5010,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -4714,17 +5033,39 @@ Group By Operator aggregations: count(1) keys: _col0 (type: string), 1 (type: int), _col2 (type: string), 2 (type: int) - mode: hash + mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col4 (type: bigint) - auto parallelism: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl5 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4777,26 +5118,53 @@ name: default.t2 name: default.t2 Truncated Path -> Alias: - /t2 [$hdt$_0:$hdt$_0:t2] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + /t2 [$hdt$_0:t2] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl5 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4814,15 +5182,98 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl5 + name: default.outputtbl5 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-0 - Move Operator - tables: - replace: true + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl5 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -4838,9 +5289,14 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 + name: default.outputtbl5 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-2 - Stats-Aggr Operator + Stage: Stage-6 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 @@ -4943,8 +5399,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -4961,17 +5422,44 @@ Group By Operator aggregations: count(1) keys: _col0 (type: string), 1 (type: int), _col2 (type: string) - mode: hash + mode: final outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5024,26 +5512,58 @@ name: default.t2 name: default.t2 Truncated Path -> Alias: - /t2 [$hdt$_0:$hdt$_0:$hdt$_0:t2] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + /t2 [$hdt$_0:t2] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5066,15 +5586,118 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + name: default.outputtbl4 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-0 - Move Operator - tables: - replace: true + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -5095,9 +5718,14 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 + name: default.outputtbl4 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-2 - Stats-Aggr Operator + Stage: Stage-6 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 @@ -5229,8 +5857,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -5246,19 +5879,45 @@ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - bucketGroup: true keys: _col0 (type: string), 2 (type: int), _col2 (type: string) - mode: hash + mode: final outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5311,26 +5970,58 @@ name: default.t2 name: default.t2 Truncated Path -> Alias: - /t2 [$hdt$_0:$hdt$_0:$hdt$_0:t2] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + /t2 [$hdt$_0:t2] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5353,15 +6044,118 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + name: default.outputtbl4 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-0 - Move Operator - tables: - replace: true + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -5382,9 +6176,14 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 + name: default.outputtbl4 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-2 - Stats-Aggr Operator + Stage: Stage-6 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 Index: ql/src/test/results/clientpositive/groupby_sort_6.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_6.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/groupby_sort_6.q.out (working copy) @@ -425,7 +425,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1/ds=2 [$hdt$_0:$hdt$_0:t1] + /t1/ds=2 [$hdt$_0:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator Index: ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out (working copy) @@ -189,7 +189,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Stage: Stage-7 Conditional Operator @@ -527,7 +527,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -849,7 +849,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Stage: Stage-7 Conditional Operator @@ -1257,7 +1257,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Stage: Stage-7 Conditional Operator @@ -1548,9 +1548,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -1567,17 +1571,39 @@ Group By Operator aggregations: count(1) keys: 1 (type: int), _col1 (type: string) - mode: hash + mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1630,90 +1656,122 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: partials - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + /t1 [$hdt$_0:t1] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10001 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2 - columns.types int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2 - columns.types int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 + name: default.outputtbl3 Truncated Path -> Alias: #### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1731,15 +1789,29 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true + Path -> Alias: #### A masked pattern was here #### - table: + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,cnt + columns.comments + columns.types int:int:int +#### A masked pattern was here #### + name default.outputtbl3 + serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1755,9 +1827,14 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 + name: default.outputtbl3 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-3 - Stats-Aggr Operator + Stage: Stage-6 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 @@ -1921,7 +1998,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2207,7 +2284,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2539,7 +2616,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [$hdt$_0:$hdt$_0:$hdt$_0:t1] + /t1 [$hdt$_0:$hdt$_0:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2951,7 +3028,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_0:t1, null-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_0:t1] + /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1, null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] Stage: Stage-7 Conditional Operator @@ -3377,7 +3454,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_0:t1] + /t1 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -3637,7 +3714,7 @@ name: default.t1 name: default.t1 Truncated Path -> Alias: - /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_0:t1] + /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1] #### A masked pattern was here #### Stage: Stage-8 @@ -4761,7 +4838,7 @@ name: default.t2 name: default.t2 Truncated Path -> Alias: - /t2 [$hdt$_0:$hdt$_0:t2] + /t2 [$hdt$_0:t2] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -4973,9 +5050,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -4992,17 +5073,44 @@ Group By Operator aggregations: count(1) keys: _col0 (type: string), 1 (type: int), _col2 (type: string) - mode: hash + mode: final outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5055,90 +5163,142 @@ name: default.t2 name: default.t2 Truncated Path -> Alias: - /t2 [$hdt$_0:$hdt$_0:t2] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: partials - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + /t2 [$hdt$_0:t2] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10001 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + name: default.outputtbl4 Truncated Path -> Alias: #### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5161,15 +5321,34 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true + Path -> Alias: #### A masked pattern was here #### - table: + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -5190,9 +5369,14 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 + name: default.outputtbl4 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-3 - Stats-Aggr Operator + Stage: Stage-6 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 @@ -5281,9 +5465,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -5300,17 +5488,39 @@ Group By Operator aggregations: count(1) keys: _col0 (type: string), 1 (type: int), _col2 (type: string), 2 (type: int) - mode: hash + mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int) - sort order: ++++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col4 (type: bigint) - auto parallelism: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl5 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5363,90 +5573,122 @@ name: default.t2 name: default.t2 Truncated Path -> Alias: - /t2 [$hdt$_0:$hdt$_0:t2] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: int) - mode: partials - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + /t2 [$hdt$_0:t2] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string,int,string,int,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl5 + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col4 (type: bigint) - auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl5 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10001 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string,int,string,int,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string,int,string,int,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl5 + name: default.outputtbl5 Truncated Path -> Alias: #### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: int) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5464,15 +5706,29 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true + Path -> Alias: #### A masked pattern was here #### - table: + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,key2,key3,key4,cnt + columns.comments + columns.types int:int:string:int:int +#### A masked pattern was here #### + name default.outputtbl5 + serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -5488,9 +5744,14 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 + name: default.outputtbl5 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-3 - Stats-Aggr Operator + Stage: Stage-6 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 @@ -5593,9 +5854,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -5612,17 +5877,44 @@ Group By Operator aggregations: count(1) keys: _col0 (type: string), 1 (type: int), _col2 (type: string) - mode: hash + mode: final outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5675,90 +5967,142 @@ name: default.t2 name: default.t2 Truncated Path -> Alias: - /t2 [$hdt$_0:$hdt$_0:$hdt$_0:t2] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: partials - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + /t2 [$hdt$_0:t2] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10001 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + name: default.outputtbl4 Truncated Path -> Alias: #### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -5781,15 +6125,34 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true + Path -> Alias: #### A masked pattern was here #### - table: + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -5810,9 +6173,14 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 + name: default.outputtbl4 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-3 - Stats-Aggr Operator + Stage: Stage-6 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 @@ -5944,9 +6312,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -5962,19 +6334,45 @@ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - bucketGroup: true keys: _col0 (type: string), 2 (type: int), _col2 (type: string) - mode: hash + mode: final outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -6027,90 +6425,142 @@ name: default.t2 name: default.t2 Truncated Path -> Alias: - /t2 [$hdt$_0:$hdt$_0:$hdt$_0:t2] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: partials - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + /t2 [$hdt$_0:t2] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col3 (type: bigint) - auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10001 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col2,_col3 - columns.types string,int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl4 + name: default.outputtbl4 Truncated Path -> Alias: #### A masked pattern was here #### - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -6133,15 +6583,34 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 - GatherStats: true + GatherStats: false MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true + Path -> Alias: #### A masked pattern was here #### - table: + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10001 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key1,key2,key3,cnt + columns.comments + columns.types int:int:string:int +#### A masked pattern was here #### + name default.outputtbl4 + numFiles 1 + numRows 6 + rawDataSize 48 + serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 54 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -6162,9 +6631,14 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 + name: default.outputtbl4 + Truncated Path -> Alias: +#### A masked pattern was here #### - Stage: Stage-3 - Stats-Aggr Operator + Stage: Stage-6 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 Index: ql/src/test/results/clientpositive/index_auth.q.out =================================================================== --- ql/src/test/results/clientpositive/index_auth.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_auth.q.out (working copy) @@ -24,7 +24,7 @@ PREHOOK: type: SHOWINDEXES POSTHOOK: query: SHOW INDEXES ON foobar POSTHOOK: type: SHOWINDEXES -srcpart_auth_index foobar key default.default__foobar_srcpart_auth_index__ bitmap +srcpart_auth_index foobar key default__foobar_srcpart_auth_index__ bitmap PREHOOK: query: grant select on table foobar to user hive_test_user PREHOOK: type: GRANT_PRIVILEGE PREHOOK: Output: default@foobar Index: ql/src/test/results/clientpositive/index_auto.q.out =================================================================== --- ql/src/test/results/clientpositive/index_auto.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_auto.q.out (working copy) @@ -146,7 +146,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_index__ + alias: default__src_src_index__ filterExpr: ((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) (type: boolean) Filter Operator predicate: ((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) (type: boolean) Index: ql/src/test/results/clientpositive/index_auto_file_format.q.out =================================================================== --- ql/src/test/results/clientpositive/index_auto_file_format.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_auto_file_format.q.out (working copy) @@ -40,7 +40,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_index__ + alias: default__src_src_index__ filterExpr: (UDFToDouble(key) = 86.0) (type: boolean) Filter Operator predicate: (UDFToDouble(key) = 86.0) (type: boolean) @@ -156,7 +156,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_index__ + alias: default__src_src_index__ filterExpr: (UDFToDouble(key) = 86.0) (type: boolean) Filter Operator predicate: (UDFToDouble(key) = 86.0) (type: boolean) Index: ql/src/test/results/clientpositive/index_auto_mult_tables.q.out =================================================================== --- ql/src/test/results/clientpositive/index_auto_mult_tables.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_auto_mult_tables.q.out (working copy) @@ -211,7 +211,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__srcpart_srcpart_index__ + alias: default__srcpart_srcpart_index__ filterExpr: (((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Filter Operator predicate: (((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) @@ -308,7 +308,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_index__ + alias: default__src_src_index__ filterExpr: (((((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) < 90.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Filter Operator predicate: (((((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) < 90.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Index: ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out =================================================================== --- ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out (working copy) @@ -221,7 +221,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__srcpart_srcpart_index__ + alias: default__srcpart_srcpart_index__ filterExpr: ((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) Filter Operator predicate: ((((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (UDFToDouble(key) > 80.0)) and (UDFToDouble(key) < 100.0)) (type: boolean) @@ -339,7 +339,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_index__ + alias: default__src_src_index__ filterExpr: ((((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) < 90.0)) (type: boolean) Filter Operator predicate: ((((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and (UDFToDouble(key) > 70.0)) and (UDFToDouble(key) < 90.0)) (type: boolean) Index: ql/src/test/results/clientpositive/index_auto_multiple.q.out =================================================================== --- ql/src/test/results/clientpositive/index_auto_multiple.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_auto_multiple.q.out (working copy) @@ -60,7 +60,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_key_index__ + alias: default__src_src_key_index__ filterExpr: (UDFToDouble(key) = 86.0) (type: boolean) Filter Operator predicate: (UDFToDouble(key) = 86.0) (type: boolean) Index: ql/src/test/results/clientpositive/index_auto_partitioned.q.out =================================================================== --- ql/src/test/results/clientpositive/index_auto_partitioned.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_auto_partitioned.q.out (working copy) @@ -54,7 +54,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__srcpart_src_part_index__ + alias: default__srcpart_src_part_index__ filterExpr: ((UDFToDouble(key) = 86.0) and (ds = '2008-04-09')) (type: boolean) Filter Operator predicate: (UDFToDouble(key) = 86.0) (type: boolean) Index: ql/src/test/results/clientpositive/index_auto_self_join.q.out =================================================================== --- ql/src/test/results/clientpositive/index_auto_self_join.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_auto_self_join.q.out (working copy) @@ -131,7 +131,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_index__ + alias: default__src_src_index__ filterExpr: (((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Filter Operator predicate: (((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) @@ -229,7 +229,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_index__ + alias: default__src_src_index__ filterExpr: (((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Filter Operator predicate: (((UDFToDouble(key) > 70.0) and (UDFToDouble(key) < 90.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Index: ql/src/test/results/clientpositive/index_auto_update.q.out =================================================================== --- ql/src/test/results/clientpositive/index_auto_update.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_auto_update.q.out (working copy) @@ -219,7 +219,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__temp_temp_index__ + alias: default__temp_temp_index__ filterExpr: (UDFToDouble(key) = 86.0) (type: boolean) Filter Operator predicate: (UDFToDouble(key) = 86.0) (type: boolean) Index: ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out =================================================================== --- ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out (working copy) @@ -52,7 +52,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__srcpart_src_part_index__ + alias: default__srcpart_src_part_index__ filterExpr: ((UDFToDouble(key) = 86.0) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Filter Operator predicate: ((UDFToDouble(key) = 86.0) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Index: ql/src/test/results/clientpositive/index_bitmap_compression.q.out =================================================================== --- ql/src/test/results/clientpositive/index_bitmap_compression.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_bitmap_compression.q.out (working copy) @@ -38,7 +38,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_index__ + alias: default__src_src_index__ filterExpr: (((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Filter Operator predicate: (((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Index: ql/src/test/results/clientpositive/index_compression.q.out =================================================================== --- ql/src/test/results/clientpositive/index_compression.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_compression.q.out (working copy) @@ -42,7 +42,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_index__ + alias: default__src_src_index__ filterExpr: ((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) (type: boolean) Filter Operator predicate: ((UDFToDouble(key) > 80.0) and (UDFToDouble(key) < 100.0)) (type: boolean) Index: ql/src/test/results/clientpositive/index_serde.q.out =================================================================== --- ql/src/test/results/clientpositive/index_serde.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_serde.q.out (working copy) @@ -72,9 +72,9 @@ POSTHOOK: query: DESCRIBE doctors POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors -number int from deserializer -first_name string from deserializer -last_name string from deserializer +number int Order of playing the role +first_name string first name of actor playing role +last_name string last name of actor playing role PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors PREHOOK: type: LOAD #### A masked pattern was here #### @@ -98,7 +98,7 @@ POSTHOOK: query: DESCRIBE EXTENDED default__doctors_doctors_index__ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@default__doctors_doctors_index__ -number int from deserializer +number int Order of playing the role _bucketname string _offsets array @@ -113,7 +113,7 @@ POSTHOOK: Output: default@default__doctors_doctors_index__ POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] -POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:Order of playing the role), ] PREHOOK: query: EXPLAIN SELECT * FROM doctors WHERE number > 6 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM doctors WHERE number > 6 @@ -134,7 +134,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__doctors_doctors_index__ + alias: default__doctors_doctors_index__ filterExpr: (number > 6) (type: boolean) Filter Operator predicate: (number > 6) (type: boolean) Index: ql/src/test/results/clientpositive/index_skewtable.q.out =================================================================== --- ql/src/test/results/clientpositive/index_skewtable.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_skewtable.q.out (working copy) @@ -92,7 +92,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__kv_kv_index__ + alias: default__kv_kv_index__ filterExpr: (value > '15') (type: boolean) Filter Operator predicate: (value > '15') (type: boolean) Index: ql/src/test/results/clientpositive/index_stale_partitioned.q.out =================================================================== --- ql/src/test/results/clientpositive/index_stale_partitioned.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/index_stale_partitioned.q.out (working copy) @@ -80,37 +80,25 @@ POSTHOOK: query: EXPLAIN SELECT * FROM temp WHERE key = 86 AND foo = 'bar' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: temp - filterExpr: ((UDFToDouble(key) = 86.0) and (foo = 'bar')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) = 86.0) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '86' (type: string), val (type: string), 'bar' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: temp + filterExpr: ((UDFToDouble(key) = 86.0) and (foo = 'bar')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 86.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '86' (type: string), val (type: string), 'bar' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: SELECT * FROM temp WHERE key = 86 AND foo = 'bar' PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/input42.q.out =================================================================== --- ql/src/test/results/clientpositive/input42.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/input42.q.out (working copy) @@ -1187,52 +1187,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (UDFToDouble(key) < 200.0) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1276,9 +1238,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### Partition - base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1322,15 +1282,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a] - /srcpart/ds=2008-04-08/hr=12 [a] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (UDFToDouble(key) < 200.0) (type: boolean) + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select * from srcpart a where a.ds='2008-04-08' and key < 200 PREHOOK: type: QUERY @@ -1759,52 +1724,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (rand(100) < 0.1) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1848,9 +1775,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### Partition - base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1894,15 +1819,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a] - /srcpart/ds=2008-04-08/hr=12 [a] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (rand(100) < 0.1) (type: boolean) + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select * from srcpart a where a.ds='2008-04-08' and rand(100) < 0.1 PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/input_part1.q.out =================================================================== --- ql/src/test/results/clientpositive/input_part1.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/input_part1.q.out (working copy) @@ -172,7 +172,7 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-7 Conditional Operator Index: ql/src/test/results/clientpositive/input_part9.q.out =================================================================== --- ql/src/test/results/clientpositive/input_part9.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/input_part9.q.out (working copy) @@ -38,52 +38,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -127,9 +89,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart -#### A masked pattern was here #### Partition - base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -173,15 +133,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [x] - /srcpart/ds=2008-04-08/hr=12 [x] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: x + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: SELECT x.* FROM SRCPART x WHERE key IS NOT NULL AND ds = '2008-04-08' PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/interval_arithmetic.q.out =================================================================== --- ql/src/test/results/clientpositive/interval_arithmetic.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/interval_arithmetic.q.out (working copy) @@ -102,7 +102,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@interval_arithmetic_1 #### A masked pattern was here #### -1970-01-01 1967-11-01 1972-03-01 1972-03-01 1967-11-01 1967-11-01 1972-03-01 +1969-12-31 1967-10-31 1972-02-29 1972-02-29 1967-10-31 1967-10-31 1972-02-29 NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: explain select @@ -174,7 +174,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@interval_arithmetic_1 #### A masked pattern was here #### -1970-01-01 -10748 23:00:00.000000000 10748 23:00:00.000000000 0 00:00:00.000000000 +1969-12-31 -10749 23:00:00.000000000 10749 23:00:00.000000000 0 00:00:00.000000000 NULL NULL NULL NULL PREHOOK: query: explain select @@ -396,7 +396,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@interval_arithmetic_1 #### A masked pattern was here #### -1970-01-01 1969-09-23 13:37:26.876543211 1970-04-10 11:22:33.123456789 1970-04-10 11:22:33.123456789 1969-09-23 13:37:26.876543211 1969-09-23 13:37:26.876543211 1970-04-10 11:22:33.123456789 +1969-12-31 1969-09-22 13:37:26.876543211 1970-04-09 11:22:33.123456789 1970-04-09 11:22:33.123456789 1969-09-22 13:37:26.876543211 1969-09-22 13:37:26.876543211 1970-04-09 11:22:33.123456789 NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: explain select @@ -472,7 +472,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@interval_arithmetic_1 #### A masked pattern was here #### -1970-01-01 1969-12-31 15:59:46.674 0 08:00:12.326000000 -0 08:00:12.326000000 0 00:00:00.000000000 +1969-12-31 1969-12-31 15:59:46.674 -0 15:59:47.674000000 0 15:59:47.674000000 0 00:00:00.000000000 NULL NULL NULL NULL NULL PREHOOK: query: explain select Index: ql/src/test/results/clientpositive/interval_udf.q.out =================================================================== --- ql/src/test/results/clientpositive/interval_udf.q.out (revision 0) +++ ql/src/test/results/clientpositive/interval_udf.q.out (working copy) @@ -0,0 +1,19 @@ +PREHOOK: query: select + year(iym), month(iym), day(idt), hour(idt), minute(idt), second(idt) +from ( + select interval '1-2' year to month iym, interval '3 4:5:6.789' day to second idt + from src limit 1 +) q +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + year(iym), month(iym), day(idt), hour(idt), minute(idt), second(idt) +from ( + select interval '1-2' year to month iym, interval '3 4:5:6.789' day to second idt + from src limit 1 +) q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 2 3 4 5 6 Index: ql/src/test/results/clientpositive/ivyDownload.q.out =================================================================== --- ql/src/test/results/clientpositive/ivyDownload.q.out (revision 0) +++ ql/src/test/results/clientpositive/ivyDownload.q.out (working copy) @@ -0,0 +1,75 @@ +PREHOOK: query: CREATE TEMPORARY FUNCTION example_add AS 'UDFExampleAdd' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: example_add +POSTHOOK: query: CREATE TEMPORARY FUNCTION example_add AS 'UDFExampleAdd' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: example_add +PREHOOK: query: EXPLAIN +SELECT example_add(1, 2), + example_add(1, 2, 3), + example_add(1, 2, 3, 4), + example_add(1.1, 2.2), + example_add(1.1, 2.2, 3.3), + example_add(1.1, 2.2, 3.3, 4.4), + example_add(1, 2, 3, 4.4) +FROM src LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT example_add(1, 2), + example_add(1, 2, 3), + example_add(1, 2, 3, 4), + example_add(1.1, 2.2), + example_add(1.1, 2.2, 3.3), + example_add(1.1, 2.2, 3.3, 4.4), + example_add(1, 2, 3, 4.4) +FROM src LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 3 (type: int), 6 (type: int), 10 (type: int), 3.3000000000000003 (type: double), 6.6 (type: double), 11.0 (type: double), 10.4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 22000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: SELECT example_add(1, 2), + example_add(1, 2, 3), + example_add(1, 2, 3, 4), + example_add(1.1, 2.2), + example_add(1.1, 2.2, 3.3), + example_add(1.1, 2.2, 3.3, 4.4), + example_add(1, 2, 3, 4.4) +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT example_add(1, 2), + example_add(1, 2, 3), + example_add(1, 2, 3, 4), + example_add(1.1, 2.2), + example_add(1.1, 2.2, 3.3), + example_add(1.1, 2.2, 3.3, 4.4), + example_add(1, 2, 3, 4.4) +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +3 6 10 3.3000000000000003 6.6 11.0 10.4 +PREHOOK: query: DROP TEMPORARY FUNCTION example_add +PREHOOK: type: DROPFUNCTION +PREHOOK: Output: example_add +POSTHOOK: query: DROP TEMPORARY FUNCTION example_add +POSTHOOK: type: DROPFUNCTION +POSTHOOK: Output: example_add Index: ql/src/test/results/clientpositive/join28.q.out =================================================================== --- ql/src/test/results/clientpositive/join28.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/join28.q.out (working copy) @@ -40,14 +40,14 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:z + $hdt$_0:z Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:z + $hdt$_0:z TableScan alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -62,7 +62,7 @@ keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/join29.q.out =================================================================== --- ql/src/test/results/clientpositive/join29.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/join29.q.out (working copy) @@ -84,11 +84,11 @@ Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$INTNAME1 + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME1 + $INTNAME1 TableScan HashTable Sink Operator keys: @@ -135,11 +135,11 @@ Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$INTNAME + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME + $INTNAME TableScan HashTable Sink Operator keys: Index: ql/src/test/results/clientpositive/join31.q.out =================================================================== --- ql/src/test/results/clientpositive/join31.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/join31.q.out (working copy) @@ -84,11 +84,11 @@ Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$INTNAME1 + $hdt$_0:$INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$INTNAME1 + $hdt$_0:$INTNAME1 TableScan HashTable Sink Operator keys: @@ -169,11 +169,11 @@ Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$INTNAME + $hdt$_0:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$INTNAME + $hdt$_0:$INTNAME TableScan HashTable Sink Operator keys: Index: ql/src/test/results/clientpositive/join32.q.out =================================================================== --- ql/src/test/results/clientpositive/join32.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/join32.q.out (working copy) @@ -109,14 +109,14 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:y + $hdt$_0:y Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:y + $hdt$_0:y TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -134,7 +134,7 @@ 0 _col0 (type: string) 1 _col3 (type: string) Position of Big Table: 1 - $hdt$_0:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -356,7 +356,7 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_1:$hdt$_1:z] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Stage: Stage-0 Move Operator Index: ql/src/test/results/clientpositive/join32_lessSize.q.out =================================================================== --- ql/src/test/results/clientpositive/join32_lessSize.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/join32_lessSize.q.out (working copy) @@ -119,11 +119,11 @@ Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -279,16 +279,16 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_1:$hdt$_1:z] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:y + $hdt$_0:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:y + $hdt$_0:y TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -666,11 +666,11 @@ Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_2:$hdt$_2:x + $hdt$_1:$hdt$_2:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_2:$hdt$_2:x + $hdt$_1:$hdt$_2:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -780,16 +780,16 @@ name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [$hdt$_0:$hdt$_1:$hdt$_2:$hdt$_3:x] + /src1 [$hdt$_1:$hdt$_2:$hdt$_3:x] Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_1:w + $hdt$_1:$hdt$_1:w Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_1:w + $hdt$_1:$hdt$_1:w TableScan alias: w Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -914,11 +914,11 @@ Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:w + $hdt$_0:w Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:w + $hdt$_0:w TableScan alias: w Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1319,11 +1319,11 @@ Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1481,12 +1481,12 @@ name: default.src1 name: default.src1 Truncated Path -> Alias: - /src [$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:y] + /src [$hdt$_1:$hdt$_1:y] Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:z + $hdt$_0:z Fetch Operator limit: -1 Partition Description: @@ -1536,7 +1536,7 @@ name: default.srcpart name: default.srcpart Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:z + $hdt$_0:z TableScan alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1931,11 +1931,11 @@ Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:y + $hdt$_0:$hdt$_0:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:y + $hdt$_0:$hdt$_0:y TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2089,7 +2089,7 @@ name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [$hdt$_0:$hdt$_0:$hdt$_1:x] + /src1 [$hdt$_0:$hdt$_1:x] Stage: Stage-8 Map Reduce Local Work @@ -2469,11 +2469,11 @@ Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -2526,11 +2526,11 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:x + $hdt$_0:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:x + $hdt$_0:x TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2726,11 +2726,11 @@ Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -2783,11 +2783,11 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:y + $hdt$_0:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:y + $hdt$_0:y TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/join33.q.out =================================================================== --- ql/src/test/results/clientpositive/join33.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/join33.q.out (working copy) @@ -109,14 +109,14 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:y + $hdt$_0:y Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:y + $hdt$_0:y TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -134,7 +134,7 @@ 0 _col0 (type: string) 1 _col3 (type: string) Position of Big Table: 1 - $hdt$_0:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -356,7 +356,7 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_1:$hdt$_1:z] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Stage: Stage-0 Move Operator Index: ql/src/test/results/clientpositive/join35.q.out =================================================================== --- ql/src/test/results/clientpositive/join35.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/join35.q.out (working copy) @@ -232,7 +232,7 @@ name: default.src name: default.src Truncated Path -> Alias: - /src [$hdt$_0-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_0:x] + /src [$hdt$_0-subquery1:$hdt$_0-subquery1:$hdt$_0:x] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -569,7 +569,7 @@ name: default.src name: default.src Truncated Path -> Alias: - /src [$hdt$_0-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_0:x] + /src [$hdt$_0-subquery2:$hdt$_0-subquery2:$hdt$_0:x] Needs Tagging: false Reduce Operator Tree: Group By Operator Index: ql/src/test/results/clientpositive/leadlag.q.out =================================================================== --- ql/src/test/results/clientpositive/leadlag.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/leadlag.q.out (working copy) @@ -223,6 +223,8 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 46 17 Manufacturer#5 almond azure blanched chiffon midnight 23 21 PREHOOK: query: -- 6. testRankInLead +-- disable cbo because of CALCITE-653 + select p_mfgr, p_name, p_size, r1, lead(r1,1,r1) over (distribute by p_mfgr sort by p_name) as deltaRank from ( @@ -234,6 +236,8 @@ PREHOOK: Input: default@part #### A masked pattern was here #### POSTHOOK: query: -- 6. testRankInLead +-- disable cbo because of CALCITE-653 + select p_mfgr, p_name, p_size, r1, lead(r1,1,r1) over (distribute by p_mfgr sort by p_name) as deltaRank from ( Index: ql/src/test/results/clientpositive/list_bucket_dml_1.q.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_1.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_1.q.out (working copy) @@ -207,8 +207,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-0 Move Operator @@ -433,52 +433,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_dynamic_part - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '484') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: key=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -522,14 +484,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_dynamic_part name: default.list_bucketing_dynamic_part - Truncated Path -> Alias: - /list_bucketing_dynamic_part/ds=2008-04-08/hr=11/key=484 [$hdt$_0:list_bucketing_dynamic_part] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: list_bucketing_dynamic_part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '484') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select key, value from list_bucketing_dynamic_part where ds='2008-04-08' and hr='11' and key = "484" PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.7.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.7.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.7.out (working copy) @@ -305,52 +305,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 'val_466') (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), 'val_466' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: value=val_466 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -394,14 +356,20 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part name: default.list_bucketing_static_part - Truncated Path -> Alias: - /list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_466 [$hdt$_0:list_bucketing_static_part] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: list_bucketing_static_part + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 'val_466') (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), 'val_466' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.7.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.7.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.7.out (working copy) @@ -166,7 +166,7 @@ name: default.src name: default.src Truncated Path -> Alias: - /src [$hdt$_0:src] + /src [src] Stage: Stage-0 Move Operator @@ -318,52 +318,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_mul_col - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:string:string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: col4=val_466 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -407,14 +369,20 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_mul_col name: default.list_bucketing_mul_col - Truncated Path -> Alias: - /list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466 [list_bucketing_mul_col] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select * from list_bucketing_mul_col where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" @@ -476,52 +444,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_mul_col - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:string:string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -565,14 +495,20 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_mul_col name: default.list_bucketing_mul_col - Truncated Path -> Alias: - /list_bucketing_mul_col/ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [list_bucketing_mul_col] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select * from list_bucketing_mul_col where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" Index: ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out (working copy) @@ -166,7 +166,7 @@ name: default.src name: default.src Truncated Path -> Alias: - /src [$hdt$_0:src] + /src [src] Stage: Stage-0 Move Operator @@ -318,52 +318,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_mul_col - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:string:string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: col4=val_466 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -407,14 +369,20 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_mul_col name: default.list_bucketing_mul_col - Truncated Path -> Alias: - /list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466 [list_bucketing_mul_col] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select * from list_bucketing_mul_col where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" Index: ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.7.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.7.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.7.out (working copy) @@ -231,8 +231,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-0 Move Operator @@ -404,52 +404,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: value=val_484 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -493,14 +455,20 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part name: default.list_bucketing_static_part - Truncated Path -> Alias: - /list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484 [list_bucketing_static_part] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: list_bucketing_static_part + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/list_bucket_dml_3.q.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_3.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_3.q.out (working copy) @@ -199,8 +199,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-0 Move Operator @@ -373,52 +373,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '484') (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: key=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -462,14 +424,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_static_part name: default.list_bucketing_static_part - Truncated Path -> Alias: - /list_bucketing_static_part/ds=2008-04-08/hr=11/key=484 [$hdt$_0:list_bucketing_static_part] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: list_bucketing_static_part + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '484') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and key = "484" PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.7.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.7.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.7.out (working copy) @@ -239,8 +239,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-0 Move Operator @@ -534,8 +534,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-7 Conditional Operator @@ -815,52 +815,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: value=val_484 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -904,14 +866,20 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part name: default.list_bucketing_static_part - Truncated Path -> Alias: - /list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484 [list_bucketing_static_part] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: list_bucketing_static_part + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/list_bucket_dml_5.q.java1.7.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_5.q.java1.7.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_5.q.java1.7.out (working copy) @@ -211,8 +211,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-0 Move Operator @@ -448,52 +448,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_dynamic_part - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '103') and (value = 'val_103')) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '103' (type: string), 'val_103' (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: value=val_103 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -537,9 +499,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_dynamic_part name: default.list_bucketing_dynamic_part -#### A masked pattern was here #### Partition - base file name: value=val_103 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -583,15 +543,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_dynamic_part name: default.list_bucketing_dynamic_part - Truncated Path -> Alias: - /list_bucketing_dynamic_part/ds=2008-04-08/hr=11/key=103/value=val_103 [list_bucketing_dynamic_part] - /list_bucketing_dynamic_part/ds=2008-04-08/hr=12/key=103/value=val_103 [list_bucketing_dynamic_part] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: list_bucketing_dynamic_part + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '103') and (value = 'val_103')) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '103' (type: string), 'val_103' (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.7.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.7.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.7.out (working copy) @@ -295,8 +295,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-0 Move Operator @@ -650,8 +650,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-7 Conditional Operator Index: ql/src/test/results/clientpositive/list_bucket_dml_7.q.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_7.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_7.q.out (working copy) @@ -241,8 +241,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-0 Move Operator @@ -596,8 +596,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-7 Conditional Operator Index: ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.7.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.7.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.7.out (working copy) @@ -297,8 +297,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-0 Move Operator Index: ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.7.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.7.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.7.out (working copy) @@ -239,8 +239,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-0 Move Operator @@ -534,8 +534,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-7 Conditional Operator @@ -815,52 +815,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: key=484 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -904,14 +866,20 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part name: default.list_bucketing_static_part - Truncated Path -> Alias: - /list_bucketing_static_part/ds=2008-04-08/hr=11/key=484 [list_bucketing_static_part] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: list_bucketing_static_part + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out (working copy) @@ -156,52 +156,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: value=val_484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -245,14 +207,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_daily/ds=1/hr=4/key=484/value=val_484 [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (key='484' and value= 'val_484') @@ -316,52 +284,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '238') and (value = 'val_238')) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '238' (type: string), 'val_238' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: value=val_238 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -405,14 +335,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_daily/ds=1/hr=4/key=238/value=val_238 [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '238') and (value = 'val_238')) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '238' (type: string), 'val_238' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and (key='238' and value= 'val_238') @@ -469,52 +405,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = '3') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -558,14 +456,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = '3') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (value = "3") @@ -623,52 +527,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '495') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '495' (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -712,14 +578,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '495') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '495' (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and key = '369' Index: ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out (working copy) @@ -154,52 +154,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 'val_484') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), 'val_484' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -243,61 +205,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily -#### A masked pattern was here #### - Partition - base file name: value=val_484 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 1 - hr 4 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.fact_daily - numFiles 3 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct fact_daily { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.fact_daily - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct fact_daily { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.fact_daily - name: default.fact_daily - Truncated Path -> Alias: - /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [$hdt$_0:fact_daily] - /fact_daily/ds=1/hr=4/key=484/value=val_484 [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 'val_484') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), 'val_484' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and value= 'val_484' @@ -353,52 +274,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '406') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '406' (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -442,14 +325,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '406') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '406' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and key= '406' @@ -526,52 +415,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238'))) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: value=val_238 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -615,61 +466,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily -#### A masked pattern was here #### - Partition - base file name: value=val_484 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 1 - hr 4 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.fact_daily - numFiles 3 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct fact_daily { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.fact_daily - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct fact_daily { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.fact_daily - name: default.fact_daily - Truncated Path -> Alias: - /fact_daily/ds=1/hr=4/key=238/value=val_238 [$hdt$_0:fact_daily] - /fact_daily/ds=1/hr=4/key=484/value=val_484 [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238'))) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query SELECT key, value FROM fact_daily WHERE ds='1' and hr='4' and ( (key='484' and value ='val_484') or (key='238' and value= 'val_238')) Index: ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out (working copy) @@ -272,52 +272,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '145') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '145' (type: string), value (type: string), '1' (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: hr=1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -361,14 +323,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_daily/ds=1/hr=1 [fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '145') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '145' (type: string), value (type: string), '1' (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select * from fact_daily where ds = '1' and hr='1' and key='145' PREHOOK: type: QUERY @@ -489,7 +457,7 @@ name: default.fact_daily name: default.fact_daily Truncated Path -> Alias: - /fact_daily/ds=1/hr=1 [$hdt$_0:$hdt$_0:fact_daily] + /fact_daily/ds=1/hr=1 [$hdt$_0:fact_daily] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -581,52 +549,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '1' (type: string), '2' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: value=val_484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -670,14 +600,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_daily/ds=1/hr=2/key=484/value=val_484 [fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), '1' (type: string), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='2' and (key='484' and value='val_484') PREHOOK: type: QUERY @@ -735,52 +671,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '327') and (value = 'val_327')) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '327' (type: string), 'val_327' (type: string), '1' (type: string), '3' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: value=val_327 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -824,14 +722,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_daily/ds=1/hr=3/key=327/value=val_327 [fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '327') and (value = 'val_327')) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '327' (type: string), 'val_327' (type: string), '1' (type: string), '3' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='3' and (key='327' and value='val_327') PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_query_oneskew_1.q.out (working copy) @@ -209,52 +209,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (x = 484) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 484 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: x=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -298,14 +260,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_tz/ds=1/x=484 [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (x = 484) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 484 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query SELECT x FROM fact_daily WHERE ds='1' and x=484 @@ -356,52 +324,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (x = 495) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 495 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -445,14 +375,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_tz/ds=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (x = 495) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 495 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query SELECT x FROM fact_daily WHERE ds='1' and x=495 @@ -499,52 +435,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (x = 1) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -588,14 +486,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_tz/ds=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (x = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: SELECT x FROM fact_daily WHERE ds='1' and x=1 PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_query_oneskew_2.q.out (working copy) @@ -226,52 +226,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (x = 484) (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 484 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: x=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -315,14 +277,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_tz/ds=1/x=484 [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (x = 484) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 484 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query select x from (select * from fact_daily where ds = '1') subq where x = 484 @@ -393,52 +361,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (x = 484) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 484 (type: int), y (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types int:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: x=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -482,14 +412,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily - Truncated Path -> Alias: - /fact_tz/ds=1/x=484 [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (x = 484) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 484 (type: int), y (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query select x1, y1 from(select x as x1, y as y1 from fact_daily where ds ='1') subq where x1 = 484 @@ -630,7 +566,7 @@ name: default.fact_daily name: default.fact_daily Truncated Path -> Alias: - /fact_tz/ds=1/x=484 [$hdt$_0:$hdt$_0:fact_daily] + /fact_tz/ds=1/x=484 [$hdt$_0:fact_daily] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -822,7 +758,7 @@ name: default.fact_daily name: default.fact_daily Truncated Path -> Alias: - /fact_tz/ds=1/x=484 [$hdt$_0:$hdt$_0:fact_daily] + /fact_tz/ds=1/x=484 [$hdt$_0:fact_daily] Needs Tagging: false Reduce Operator Tree: Group By Operator Index: ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out =================================================================== --- ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/list_bucket_query_oneskew_3.q.out (working copy) @@ -237,52 +237,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: fact_daily - Statistics: Num rows: 29 Data size: 117 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (not (x = 86)) (type: boolean) - Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: x (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -326,108 +288,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily -#### A masked pattern was here #### - Partition - base file name: x=238 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 1 - properties: - COLUMN_STATS_ACCURATE false - bucket_count -1 - columns x,y,z - columns.comments - columns.types int:string:string -#### A masked pattern was here #### - name default.fact_daily - numFiles 3 - numRows -1 - partition_columns ds - partition_columns.types string - rawDataSize -1 - serialization.ddl struct fact_daily { i32 x, string y, string z} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 117 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - EXTERNAL TRUE - bucket_count -1 - columns x,y,z - columns.comments - columns.types int:string:string -#### A masked pattern was here #### - name default.fact_daily - partition_columns ds - partition_columns.types string - serialization.ddl struct fact_daily { i32 x, string y, string z} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.fact_daily - name: default.fact_daily -#### A masked pattern was here #### - Partition - base file name: x=484 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 1 - properties: - COLUMN_STATS_ACCURATE false - bucket_count -1 - columns x,y,z - columns.comments - columns.types int:string:string -#### A masked pattern was here #### - name default.fact_daily - numFiles 3 - numRows -1 - partition_columns ds - partition_columns.types string - rawDataSize -1 - serialization.ddl struct fact_daily { i32 x, string y, string z} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 117 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - EXTERNAL TRUE - bucket_count -1 - columns x,y,z - columns.comments - columns.types int:string:string -#### A masked pattern was here #### - name default.fact_daily - partition_columns ds - partition_columns.types string - serialization.ddl struct fact_daily { i32 x, string y, string z} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.fact_daily - name: default.fact_daily - Truncated Path -> Alias: - /fact_tz/ds=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [$hdt$_0:fact_daily] - /fact_tz/ds=1/x=238 [$hdt$_0:fact_daily] - /fact_tz/ds=1/x=484 [$hdt$_0:fact_daily] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: fact_daily + Statistics: Num rows: 29 Data size: 117 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (not (x = 86)) (type: boolean) + Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: x (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: -- List Bucketing Query SELECT x FROM fact_daily WHERE ds='1' and not (x = 86) Index: ql/src/test/results/clientpositive/load_dyn_part14.q.out =================================================================== --- ql/src/test/results/clientpositive/load_dyn_part14.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/load_dyn_part14.q.out (working copy) @@ -76,22 +76,22 @@ Select Operator expressions: 'k1' (type: string), UDFToString(null) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 135000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 85000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -104,10 +104,10 @@ Map Operator Tree: TableScan Union - Statistics: Num rows: 6 Data size: 1222 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1222 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -115,10 +115,10 @@ name: default.nzhang_part14 TableScan Union - Statistics: Num rows: 6 Data size: 1222 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1222 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -126,10 +126,10 @@ name: default.nzhang_part14 TableScan Union - Statistics: Num rows: 6 Data size: 1222 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1222 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Index: ql/src/test/results/clientpositive/louter_join_ppr.q.out =================================================================== --- ql/src/test/results/clientpositive/louter_join_ppr.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/louter_join_ppr.q.out (working copy) @@ -1149,9 +1149,9 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_0:$hdt$_1:$hdt$_1:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:$hdt$_0:b] + /src [$hdt$_1:$hdt$_1:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] Needs Tagging: true Reduce Operator Tree: Join Operator Index: ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (working copy) @@ -64,14 +64,14 @@ Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:src + $hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:$hdt$_2:src1 + $hdt$_1:$hdt$_2:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:src + $hdt$_0:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -89,7 +89,7 @@ 0 _col0 (type: string) 1 _col1 (type: string) Position of Big Table: 1 - $hdt$_0:$hdt$_1:$hdt$_2:src1 + $hdt$_1:$hdt$_2:src1 TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -444,10 +444,10 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_1:$hdt$_1:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_1:$hdt$_1:srcpart] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:$hdt$_1:$hdt$_1:srcpart] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:$hdt$_1:$hdt$_1:srcpart] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:srcpart] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:$hdt$_1:srcpart] + /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:$hdt$_1:srcpart] + /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:$hdt$_1:srcpart] Stage: Stage-0 Fetch Operator @@ -470,14 +470,14 @@ Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:src + $hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:$hdt$_2:src1 + $hdt$_1:$hdt$_2:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:src + $hdt$_0:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -492,7 +492,7 @@ keys: 0 _col0 (type: string) 1 _col1 (type: string) - $hdt$_0:$hdt$_1:$hdt$_2:src1 + $hdt$_1:$hdt$_2:src1 TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/mapjoin_subquery.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_subquery.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/mapjoin_subquery.q.out (working copy) @@ -27,14 +27,14 @@ Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:z + $hdt$_0:z Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:z + $hdt$_0:z TableScan alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -49,7 +49,7 @@ keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -270,14 +270,14 @@ Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:z + $hdt$_0:z Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:z + $hdt$_0:z TableScan alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -292,7 +292,7 @@ keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/multiMapJoin2.q.out =================================================================== --- ql/src/test/results/clientpositive/multiMapJoin2.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/multiMapJoin2.q.out (working copy) @@ -761,11 +761,11 @@ Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:y1 + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:y1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:y1 + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:y1 TableScan alias: y1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -833,11 +833,11 @@ Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$INTNAME1 + $hdt$_0:$INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$INTNAME1 + $hdt$_0:$INTNAME1 TableScan HashTable Sink Operator keys: @@ -917,11 +917,11 @@ Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$INTNAME + $hdt$_0:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$INTNAME + $hdt$_0:$INTNAME TableScan HashTable Sink Operator keys: @@ -993,11 +993,11 @@ Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:y1 + $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:y1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:y1 + $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:y1 TableScan alias: y1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1159,14 +1159,14 @@ Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:y1 + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:y1 Fetch Operator limit: -1 - $hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:y1 + $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:y1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:y1 + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:y1 TableScan alias: y1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1181,7 +1181,7 @@ keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:y1 + $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:y1 TableScan alias: y1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1488,11 +1488,11 @@ Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$INTNAME1 + $hdt$_0:$INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$INTNAME1 + $hdt$_0:$INTNAME1 TableScan HashTable Sink Operator keys: @@ -1575,11 +1575,11 @@ Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$INTNAME + $hdt$_0:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$INTNAME + $hdt$_0:$INTNAME TableScan HashTable Sink Operator keys: @@ -1658,11 +1658,11 @@ Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x1 + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x1 + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x1 TableScan alias: x1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1823,11 +1823,11 @@ Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x1 + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x1 + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_1:x1 TableScan alias: x1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -2218,11 +2218,11 @@ Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:a + null-subquery1:$hdt$_0-subquery1:$hdt$_1:$hdt$_1:$hdt$_1:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:a + null-subquery1:$hdt$_0-subquery1:$hdt$_1:$hdt$_1:$hdt$_1:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2287,14 +2287,14 @@ Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_0:a + null-subquery1:$hdt$_0-subquery1:$hdt$_0:a Fetch Operator limit: -1 - null-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_0:a + null-subquery2:$hdt$_0-subquery2:$hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery1:$hdt$_0-subquery1:$hdt$_0:$hdt$_0:a + null-subquery1:$hdt$_0-subquery1:$hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2309,7 +2309,7 @@ keys: 0 _col0 (type: string) 1 _col0 (type: string) - null-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_0:a + null-subquery2:$hdt$_0-subquery2:$hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2370,11 +2370,11 @@ Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:a + null-subquery2:$hdt$_0-subquery2:$hdt$_1:$hdt$_1:$hdt$_1:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:a + null-subquery2:$hdt$_0-subquery2:$hdt$_1:$hdt$_1:$hdt$_1:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/non_native_window_udf.q.out =================================================================== --- ql/src/test/results/clientpositive/non_native_window_udf.q.out (revision 0) +++ ql/src/test/results/clientpositive/non_native_window_udf.q.out (working copy) @@ -0,0 +1,52 @@ +PREHOOK: query: create temporary function mylastval as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: mylastval +POSTHOOK: query: create temporary function mylastval as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: mylastval +PREHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l, +mylastval(p_size, false) over w1 as m +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l, +mylastval(p_size, false) over w1 as m +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 2 2 34 34 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 2 28 28 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 2 42 42 +Manufacturer#1 almond aquamarine burnished black steel 28 28 34 42 42 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 6 42 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 2 2 +Manufacturer#2 almond antique violet turquoise frosted 40 40 14 25 25 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 14 18 18 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 40 18 18 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 2 18 18 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 19 19 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 17 1 1 +Manufacturer#3 almond antique metallic orange dim 19 19 17 45 45 +Manufacturer#3 almond antique misty red olive 1 1 14 45 45 +Manufacturer#3 almond antique olive coral navajo 45 45 19 45 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 27 27 +Manufacturer#4 almond antique violet mint lemon 39 39 10 7 7 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 10 12 12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 39 12 12 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 27 12 12 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 2 2 +Manufacturer#5 almond antique medium spring khaki 6 6 31 46 46 +Manufacturer#5 almond antique sky peru orange 2 2 31 23 23 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 6 23 23 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 2 23 23 Index: ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out =================================================================== --- ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out (working copy) @@ -20,20 +20,16 @@ alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (UDFToDouble(key) + 1.0) (type: double) - outputColumnNames: _col0 + expressions: (UDFToDouble(key) + 1.0) (type: double), ((UDFToDouble(key) + 1.0) + 1.0) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), (_col0 + 1.0) (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/parquet_columnar.q.out =================================================================== --- ql/src/test/results/clientpositive/parquet_columnar.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/parquet_columnar.q.out (working copy) @@ -11,8 +11,8 @@ POSTHOOK: query: DROP TABLE IF EXISTS parquet_columnar_renamed POSTHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE parquet_columnar_access_stage ( - s string, - i int, + s string, + i int, f float ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' @@ -20,8 +20,8 @@ PREHOOK: Output: database:default PREHOOK: Output: default@parquet_columnar_access_stage POSTHOOK: query: CREATE TABLE parquet_columnar_access_stage ( - s string, - i int, + s string, + i int, f float ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' @@ -32,7 +32,8 @@ s string, x int, y int, - f float + f float, + address struct ) STORED AS PARQUET PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -41,7 +42,8 @@ s string, x int, y int, - f float + f float, + address struct ) STORED AS PARQUET POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default @@ -54,14 +56,17 @@ POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@parquet_columnar_access_stage -PREHOOK: query: INSERT OVERWRITE TABLE parquet_columnar_access SELECT s, i, (i + 1), f FROM parquet_columnar_access_stage +PREHOOK: query: INSERT OVERWRITE TABLE parquet_columnar_access SELECT s, i, (i + 1), f, named_struct('intVals', +i,'strVals',s) FROM parquet_columnar_access_stage PREHOOK: type: QUERY PREHOOK: Input: default@parquet_columnar_access_stage PREHOOK: Output: default@parquet_columnar_access -POSTHOOK: query: INSERT OVERWRITE TABLE parquet_columnar_access SELECT s, i, (i + 1), f FROM parquet_columnar_access_stage +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_columnar_access SELECT s, i, (i + 1), f, named_struct('intVals', +i,'strVals',s) FROM parquet_columnar_access_stage POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_columnar_access_stage POSTHOOK: Output: default@parquet_columnar_access +POSTHOOK: Lineage: parquet_columnar_access.address EXPRESSION [(parquet_columnar_access_stage)parquet_columnar_access_stage.FieldSchema(name:i, type:int, comment:null), (parquet_columnar_access_stage)parquet_columnar_access_stage.FieldSchema(name:s, type:string, comment:null), ] POSTHOOK: Lineage: parquet_columnar_access.f SIMPLE [(parquet_columnar_access_stage)parquet_columnar_access_stage.FieldSchema(name:f, type:float, comment:null), ] POSTHOOK: Lineage: parquet_columnar_access.s SIMPLE [(parquet_columnar_access_stage)parquet_columnar_access_stage.FieldSchema(name:s, type:string, comment:null), ] POSTHOOK: Lineage: parquet_columnar_access.x SIMPLE [(parquet_columnar_access_stage)parquet_columnar_access_stage.FieldSchema(name:i, type:int, comment:null), ] @@ -74,27 +79,27 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_columnar_access #### A masked pattern was here #### -1abc00 1 2 1.0 -1def01 2 3 1.1 -1ghi02 3 4 1.2 -1jkl03 1 2 1.3 -1mno04 2 3 1.4 -1pqr05 3 4 1.0 -1stu06 1 2 1.1 -1vwx07 2 3 1.2 -1yza08 3 4 1.3 -1bcd09 1 2 1.4 -1efg10 2 3 1.0 -1hij11 3 4 1.1 -1klm12 1 2 1.2 -1nop13 2 3 1.3 -1qrs14 3 4 1.4 -1tuv15 1 2 1.0 -1wxy16 2 3 1.1 -1zab17 3 4 1.2 -1cde18 1 2 1.3 -1fgh19 2 3 1.4 -1ijk20 3 4 1.0 +1abc00 1 2 1.0 {"intVals":1,"strVals":"1abc00"} +1def01 2 3 1.1 {"intVals":2,"strVals":"1def01"} +1ghi02 3 4 1.2 {"intVals":3,"strVals":"1ghi02"} +1jkl03 1 2 1.3 {"intVals":1,"strVals":"1jkl03"} +1mno04 2 3 1.4 {"intVals":2,"strVals":"1mno04"} +1pqr05 3 4 1.0 {"intVals":3,"strVals":"1pqr05"} +1stu06 1 2 1.1 {"intVals":1,"strVals":"1stu06"} +1vwx07 2 3 1.2 {"intVals":2,"strVals":"1vwx07"} +1yza08 3 4 1.3 {"intVals":3,"strVals":"1yza08"} +1bcd09 1 2 1.4 {"intVals":1,"strVals":"1bcd09"} +1efg10 2 3 1.0 {"intVals":2,"strVals":"1efg10"} +1hij11 3 4 1.1 {"intVals":3,"strVals":"1hij11"} +1klm12 1 2 1.2 {"intVals":1,"strVals":"1klm12"} +1nop13 2 3 1.3 {"intVals":2,"strVals":"1nop13"} +1qrs14 3 4 1.4 {"intVals":3,"strVals":"1qrs14"} +1tuv15 1 2 1.0 {"intVals":1,"strVals":"1tuv15"} +1wxy16 2 3 1.1 {"intVals":2,"strVals":"1wxy16"} +1zab17 3 4 1.2 {"intVals":3,"strVals":"1zab17"} +1cde18 1 2 1.3 {"intVals":1,"strVals":"1cde18"} +1fgh19 2 3 1.4 {"intVals":2,"strVals":"1fgh19"} +1ijk20 3 4 1.0 {"intVals":3,"strVals":"1ijk20"} PREHOOK: query: ALTER TABLE parquet_columnar_access REPLACE COLUMNS (s1 string, x1 int, y1 int, f1 float) PREHOOK: type: ALTERTABLE_REPLACECOLS PREHOOK: Input: default@parquet_columnar_access Index: ql/src/test/results/clientpositive/parquet_schema_evolution.q.out =================================================================== --- ql/src/test/results/clientpositive/parquet_schema_evolution.q.out (revision 0) +++ ql/src/test/results/clientpositive/parquet_schema_evolution.q.out (working copy) @@ -0,0 +1,141 @@ +PREHOOK: query: -- Some tables might have extra columns and struct elements on the schema than the on Parquet schema; +-- This is called 'schema evolution' as the Parquet file is not ready yet for such new columns; +-- Hive should support this schema, and return NULL values instead; + +DROP TABLE NewStructField +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Some tables might have extra columns and struct elements on the schema than the on Parquet schema; +-- This is called 'schema evolution' as the Parquet file is not ready yet for such new columns; +-- Hive should support this schema, and return NULL values instead; + +DROP TABLE NewStructField +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE NewStructFieldTable +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE NewStructFieldTable +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE NewStructField(a struct, a2:struct>) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@NewStructField +POSTHOOK: query: CREATE TABLE NewStructField(a struct, a2:struct>) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@NewStructField +PREHOOK: query: INSERT OVERWRITE TABLE NewStructField SELECT named_struct('a1', map('k1','v1'), 'a2', named_struct('e1',5)) FROM srcpart LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@newstructfield +POSTHOOK: query: INSERT OVERWRITE TABLE NewStructField SELECT named_struct('a1', map('k1','v1'), 'a2', named_struct('e1',5)) FROM srcpart LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@newstructfield +POSTHOOK: Lineage: newstructfield.a EXPRESSION [] +PREHOOK: query: DESCRIBE NewStructField +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@newstructfield +POSTHOOK: query: DESCRIBE NewStructField +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@newstructfield +a struct,a2:struct> +PREHOOK: query: SELECT * FROM NewStructField +PREHOOK: type: QUERY +PREHOOK: Input: default@newstructfield +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM NewStructField +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newstructfield +#### A masked pattern was here #### +{"a1":{"k1":"v1"},"a2":{"e1":5}} +{"a1":{"k1":"v1"},"a2":{"e1":5}} +{"a1":{"k1":"v1"},"a2":{"e1":5}} +{"a1":{"k1":"v1"},"a2":{"e1":5}} +{"a1":{"k1":"v1"},"a2":{"e1":5}} +PREHOOK: query: -- Adds new fields to the struct types +ALTER TABLE NewStructField REPLACE COLUMNS (a struct, a2:struct, a3:int>, b int) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@newstructfield +PREHOOK: Output: default@newstructfield +POSTHOOK: query: -- Adds new fields to the struct types +ALTER TABLE NewStructField REPLACE COLUMNS (a struct, a2:struct, a3:int>, b int) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@newstructfield +POSTHOOK: Output: default@newstructfield +PREHOOK: query: DESCRIBE NewStructField +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@newstructfield +POSTHOOK: query: DESCRIBE NewStructField +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@newstructfield +a struct,a2:struct,a3:int> +b int +PREHOOK: query: SELECT * FROM NewStructField +PREHOOK: type: QUERY +PREHOOK: Input: default@newstructfield +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM NewStructField +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newstructfield +#### A masked pattern was here #### +{"a1":{"k1":"v1"},"a2":{"e1":5,"e2":null},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5,"e2":null},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5,"e2":null},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5,"e2":null},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5,"e2":null},"a3":null} NULL +PREHOOK: query: -- Makes sure that new parquet tables contain the new struct field +CREATE TABLE NewStructFieldTable STORED AS PARQUET AS SELECT * FROM NewStructField +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@newstructfield +PREHOOK: Output: database:default +PREHOOK: Output: default@NewStructFieldTable +POSTHOOK: query: -- Makes sure that new parquet tables contain the new struct field +CREATE TABLE NewStructFieldTable STORED AS PARQUET AS SELECT * FROM NewStructField +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@newstructfield +POSTHOOK: Output: database:default +POSTHOOK: Output: default@NewStructFieldTable +PREHOOK: query: DESCRIBE NewStructFieldTable +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@newstructfieldtable +POSTHOOK: query: DESCRIBE NewStructFieldTable +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@newstructfieldtable +a struct,a2:struct,a3:int> +b int +PREHOOK: query: SELECT * FROM NewStructFieldTable +PREHOOK: type: QUERY +PREHOOK: Input: default@newstructfieldtable +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM NewStructFieldTable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newstructfieldtable +#### A masked pattern was here #### +{"a1":{"k1":"v1"},"a2":{"e1":5,"e2":null},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5,"e2":null},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5,"e2":null},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5,"e2":null},"a3":null} NULL +{"a1":{"k1":"v1"},"a2":{"e1":5,"e2":null},"a3":null} NULL +PREHOOK: query: DROP TABLE NewStructField +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@newstructfield +PREHOOK: Output: default@newstructfield +POSTHOOK: query: DROP TABLE NewStructField +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@newstructfield +POSTHOOK: Output: default@newstructfield +PREHOOK: query: DROP TABLE NewStructFieldTable +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@newstructfieldtable +PREHOOK: Output: default@newstructfieldtable +POSTHOOK: query: DROP TABLE NewStructFieldTable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@newstructfieldtable +POSTHOOK: Output: default@newstructfieldtable Index: ql/src/test/results/clientpositive/parquet_table_with_subschema.q.out =================================================================== --- ql/src/test/results/clientpositive/parquet_table_with_subschema.q.out (revision 0) +++ ql/src/test/results/clientpositive/parquet_table_with_subschema.q.out (working copy) @@ -0,0 +1,47 @@ +PREHOOK: query: -- Sometimes, the user wants to create a table from just a portion of the file schema; +-- This test makes sure that this scenario works; + +DROP TABLE test +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Sometimes, the user wants to create a table from just a portion of the file schema; +-- This test makes sure that this scenario works; + +DROP TABLE test +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- Current file schema is: (id int, name string, address struct); +-- Creates a table from just a portion of the file schema, including struct elements (test lower/upper case as well) +CREATE TABLE test (Name string, address struct) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +POSTHOOK: query: -- Current file schema is: (id int, name string, address struct); +-- Creates a table from just a portion of the file schema, including struct elements (test lower/upper case as well) +CREATE TABLE test (Name string, address struct) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/HiveGroup.parquet' OVERWRITE INTO TABLE test +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/HiveGroup.parquet' OVERWRITE INTO TABLE test +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test +PREHOOK: query: SELECT * FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +Roger {"Zip":"87366","Street":"Congress Ave."} +PREHOOK: query: DROP TABLE test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test +PREHOOK: Output: default@test +POSTHOOK: query: DROP TABLE test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test +POSTHOOK: Output: default@test Index: ql/src/test/results/clientpositive/pcr.q.out =================================================================== --- ql/src/test/results/clientpositive/pcr.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/pcr.q.out (working copy) @@ -475,9 +475,9 @@ name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [$hdt$_0:pcr_t1] - /pcr_t1/ds=2000-04-09 [$hdt$_0:pcr_t1] - /pcr_t1/ds=2000-04-10 [$hdt$_0:pcr_t1] + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-10 [pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator @@ -1798,8 +1798,8 @@ name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [$hdt$_0:pcr_t1] - /pcr_t1/ds=2000-04-09 [$hdt$_0:pcr_t1] + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator @@ -2003,8 +2003,8 @@ name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [$hdt$_0:pcr_t1] - /pcr_t1/ds=2000-04-09 [$hdt$_0:pcr_t1] + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator @@ -2293,9 +2293,9 @@ name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [$hdt$_0:pcr_t1] - /pcr_t1/ds=2000-04-09 [$hdt$_0:pcr_t1] - /pcr_t1/ds=2000-04-10 [$hdt$_0:pcr_t1] + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-10 [pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator @@ -5325,7 +5325,7 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] Needs Tagging: false Reduce Operator Tree: Select Operator Index: ql/src/test/results/clientpositive/ppd_vc.q.out =================================================================== --- ql/src/test/results/clientpositive/ppd_vc.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/ppd_vc.q.out (working copy) @@ -258,10 +258,10 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out =================================================================== --- ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out (working copy) @@ -197,8 +197,8 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] Stage: Stage-0 Fetch Operator @@ -521,10 +521,10 @@ name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:srcpart] + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out =================================================================== --- ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out (working copy) @@ -253,7 +253,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -572,7 +572,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -746,7 +746,7 @@ Map Reduce Map Operator Tree: TableScan - alias: lastyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: lastyear:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (year(l_shipdate) = 1997) (type: boolean) @@ -830,7 +830,7 @@ Map Reduce Map Operator Tree: TableScan - alias: thisyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: thisyear:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (year(l_shipdate) = 1998) (type: boolean) @@ -900,7 +900,7 @@ Map Reduce Map Operator Tree: TableScan - alias: null-subquery1:dummy-subquery1:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: null-subquery1:dummy-subquery1:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -1063,7 +1063,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__tbl_tbl_key_idx__ + alias: default__tbl_tbl_key_idx__ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) @@ -1161,7 +1161,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__tbl_tbl_key_idx__ + alias: default__tbl_tbl_key_idx__ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) @@ -2252,7 +2252,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__tblpart_tbl_part_index__ + alias: default__tblpart_tbl_part_index__ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -2445,7 +2445,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__tbl_tbl_key_idx__ + alias: default__tbl_tbl_key_idx__ Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) Index: ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out =================================================================== --- ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_1.q.out (working copy) @@ -253,7 +253,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: $hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -572,7 +572,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: $hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: year(l_shipdate) (type: int), month(l_shipdate) (type: int), l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -746,7 +746,7 @@ Map Reduce Map Operator Tree: TableScan - alias: lastyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: lastyear:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (year(l_shipdate) = 1997) (type: boolean) @@ -830,7 +830,7 @@ Map Reduce Map Operator Tree: TableScan - alias: thisyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: thisyear:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (year(l_shipdate) = 1998) (type: boolean) @@ -900,7 +900,7 @@ Map Reduce Map Operator Tree: TableScan - alias: null-subquery1:$hdt$_0-subquery1:$hdt$_0:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: null-subquery1:$hdt$_0-subquery1:$hdt$_0:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -1063,7 +1063,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0:default.default__tbl_tbl_key_idx__ + alias: $hdt$_0:default__tbl_tbl_key_idx__ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) @@ -1161,7 +1161,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0:default.default__tbl_tbl_key_idx__ + alias: $hdt$_0:default__tbl_tbl_key_idx__ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) @@ -2260,7 +2260,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0:$hdt$_0:default.default__tblpart_tbl_part_index__ + alias: $hdt$_0:default__tblpart_tbl_part_index__ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -2453,7 +2453,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0:default.default__tbl_tbl_key_idx__ + alias: $hdt$_0:default__tbl_tbl_key_idx__ Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) Index: ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out =================================================================== --- ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/ql_rewrite_gbtoidx_cbo_2.q.out (working copy) @@ -176,7 +176,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + alias: $hdt$_0:default__lineitem_ix_lineitem_ix_l_orderkey_idx__ Statistics: Num rows: 26 Data size: 2604 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), _count_of_l_orderkey (type: bigint) @@ -1223,7 +1223,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + alias: $hdt$_0:default__lineitem_ix_lineitem_ix_l_orderkey_idx__ Statistics: Num rows: 26 Data size: 2604 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), _count_of_l_orderkey (type: bigint) @@ -1907,7 +1907,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + alias: $hdt$_0:$hdt$_0:default__lineitem_ix_lineitem_ix_l_orderkey_idx__ Statistics: Num rows: 26 Data size: 2604 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (l_orderkey < 7) (type: boolean) @@ -2072,7 +2072,7 @@ Map Reduce Map Operator Tree: TableScan - alias: taba:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + alias: taba:default__lineitem_ix_lineitem_ix_l_orderkey_idx__ Statistics: Num rows: 26 Data size: 2604 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (l_orderkey < 7) (type: boolean) @@ -2148,7 +2148,7 @@ Map Reduce Map Operator Tree: TableScan - alias: tabb:default.default__src_src_key_idx__ + alias: tabb:default__src_src_key_idx__ Statistics: Num rows: 309 Data size: 23663 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), _count_of_key (type: bigint) @@ -2814,7 +2814,7 @@ Map Reduce Map Operator Tree: TableScan - alias: taba:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + alias: taba:default__lineitem_ix_lineitem_ix_l_orderkey_idx__ Statistics: Num rows: 26 Data size: 2604 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (l_orderkey < 7) (type: boolean) @@ -2890,7 +2890,7 @@ Map Reduce Map Operator Tree: TableScan - alias: tabb:default.default__src_src_key_idx__ + alias: tabb:default__src_src_key_idx__ Statistics: Num rows: 309 Data size: 23663 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < '2') (type: boolean) @@ -3192,7 +3192,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + alias: $hdt$_0:default__lineitem_ix_lineitem_ix_l_orderkey_idx__ Statistics: Num rows: 26 Data size: 2604 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), (l_orderkey + 1) (type: int), _count_of_l_orderkey (type: bigint) @@ -3282,7 +3282,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + alias: $hdt$_0:default__lineitem_ix_lineitem_ix_l_orderkey_idx__ Statistics: Num rows: 26 Data size: 2604 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (l_orderkey + 2) (type: int), l_orderkey (type: int), _count_of_l_orderkey (type: bigint) @@ -3404,7 +3404,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0-subquery1:$hdt$_0-subquery1:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_orderkey_idx__ + alias: $hdt$_0-subquery1:$hdt$_0-subquery1:$hdt$_0:default__lineitem_ix_lineitem_ix_l_orderkey_idx__ Statistics: Num rows: 26 Data size: 2604 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (l_orderkey < 7) (type: boolean) @@ -3495,7 +3495,7 @@ Map Reduce Map Operator Tree: TableScan - alias: $hdt$_0-subquery2:$hdt$_0-subquery2:$hdt$_0:default.default__lineitem_ix_lineitem_ix_l_partkey_idx__ + alias: $hdt$_0-subquery2:$hdt$_0-subquery2:$hdt$_0:default__lineitem_ix_lineitem_ix_l_partkey_idx__ Statistics: Num rows: 100 Data size: 8937 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (l_partkey < 10) (type: boolean) Index: ql/src/test/results/clientpositive/rand_partitionpruner3.q.out =================================================================== --- ql/src/test/results/clientpositive/rand_partitionpruner3.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/rand_partitionpruner3.q.out (working copy) @@ -57,52 +57,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((rand(1) < 0.1) and (not ((UDFToDouble(key) > 50.0) or (UDFToDouble(key) < 10.0)))) (type: boolean) - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -146,14 +108,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=12 [a] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((rand(1) < 0.1) and (not ((UDFToDouble(key) > 50.0) or (UDFToDouble(key) < 10.0)))) (type: boolean) + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' PREHOOK: type: QUERY @@ -221,52 +189,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (not ((UDFToDouble(key) > 50.0) or (UDFToDouble(key) < 10.0))) (type: boolean) - Statistics: Num rows: 168 Data size: 1784 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 168 Data size: 1784 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 168 Data size: 1784 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -310,14 +240,20 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=12 [a] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (not ((UDFToDouble(key) > 50.0) or (UDFToDouble(key) < 10.0))) (type: boolean) + Statistics: Num rows: 168 Data size: 1784 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 168 Data size: 1784 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/show_functions.q.out =================================================================== --- ql/src/test/results/clientpositive/show_functions.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/show_functions.q.out (working copy) @@ -125,6 +125,7 @@ min minute month +months_between named_struct negative next_day @@ -326,6 +327,7 @@ min minute month +months_between xpath xpath_boolean xpath_double Index: ql/src/test/results/clientpositive/show_indexes_edge_cases.q.out =================================================================== --- ql/src/test/results/clientpositive/show_indexes_edge_cases.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/show_indexes_edge_cases.q.out (working copy) @@ -118,10 +118,10 @@ PREHOOK: type: SHOWINDEXES POSTHOOK: query: SHOW INDEXES ON show_idx_full POSTHOOK: type: SHOWINDEXES -idx_1 show_idx_full key default.default__show_idx_full_idx_1__ compact -idx_2 show_idx_full value1 default.default__show_idx_full_idx_2__ compact -idx_comment show_idx_full value2 default.default__show_idx_full_idx_comment__ compact index comment -idx_compound show_idx_full key, value1 default.default__show_idx_full_idx_compound__ compact +idx_1 show_idx_full key default__show_idx_full_idx_1__ compact +idx_2 show_idx_full value1 default__show_idx_full_idx_2__ compact +idx_comment show_idx_full value2 default__show_idx_full_idx_comment__ compact index comment +idx_compound show_idx_full key, value1 default__show_idx_full_idx_compound__ compact PREHOOK: query: EXPLAIN SHOW INDEXES ON show_idx_empty PREHOOK: type: SHOWINDEXES POSTHOOK: query: EXPLAIN SHOW INDEXES ON show_idx_empty Index: ql/src/test/results/clientpositive/show_indexes_syntax.q.out =================================================================== --- ql/src/test/results/clientpositive/show_indexes_syntax.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/show_indexes_syntax.q.out (working copy) @@ -53,7 +53,7 @@ PREHOOK: type: SHOWINDEXES POSTHOOK: query: SHOW INDEX ON show_idx_t1 POSTHOOK: type: SHOWINDEXES -idx_t1 show_idx_t1 key default.default__show_idx_t1_idx_t1__ compact +idx_t1 show_idx_t1 key default__show_idx_t1_idx_t1__ compact PREHOOK: query: EXPLAIN SHOW INDEXES ON show_idx_t1 PREHOOK: type: SHOWINDEXES @@ -79,7 +79,7 @@ PREHOOK: type: SHOWINDEXES POSTHOOK: query: SHOW INDEXES ON show_idx_t1 POSTHOOK: type: SHOWINDEXES -idx_t1 show_idx_t1 key default.default__show_idx_t1_idx_t1__ compact +idx_t1 show_idx_t1 key default__show_idx_t1_idx_t1__ compact PREHOOK: query: EXPLAIN SHOW FORMATTED INDEXES ON show_idx_t1 PREHOOK: type: SHOWINDEXES @@ -108,7 +108,7 @@ idx_name tab_name col_names idx_tab_name idx_type comment -idx_t1 show_idx_t1 key default.default__show_idx_t1_idx_t1__ compact +idx_t1 show_idx_t1 key default__show_idx_t1_idx_t1__ compact PREHOOK: query: DROP TABLE show_idx_t1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@show_idx_t1 Index: ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out (working copy) @@ -65,8 +65,8 @@ POSTHOOK: query: DESC avro_dec POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@avro_dec -name string from deserializer -value decimal(5,2) from deserializer +name string +value decimal(5,2) PREHOOK: query: INSERT OVERWRITE TABLE avro_dec SELECT name, value FROM dec PREHOOK: type: QUERY PREHOOK: Input: default@dec @@ -121,8 +121,8 @@ POSTHOOK: query: DESC avro_dec1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@avro_dec1 -name string from deserializer -value decimal(4,1) from deserializer +name string +value decimal(4,1) PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.avro' INTO TABLE avro_dec1 PREHOOK: type: LOAD #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/spark/avro_joins.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/avro_joins.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/spark/avro_joins.q.out (working copy) @@ -31,7 +31,7 @@ { "name":"extra_field", "type":"string", - "doc:":"an extra field not in the original file", + "doc":"an extra field not in the original file", "default":"fishfingers and custard" } ] @@ -72,7 +72,7 @@ { "name":"extra_field", "type":"string", - "doc:":"an extra field not in the original file", + "doc":"an extra field not in the original file", "default":"fishfingers and custard" } ] @@ -86,10 +86,10 @@ POSTHOOK: query: DESCRIBE doctors4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors4 -number int from deserializer -first_name string from deserializer -last_name string from deserializer -extra_field string from deserializer +number int Order of playing the role +first_name string first name of actor playing role +last_name string last name of actor playing role +extra_field string an extra field not in the original file PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 PREHOOK: type: LOAD #### A masked pattern was here #### @@ -166,9 +166,9 @@ POSTHOOK: query: DESCRIBE episodes POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@episodes -title string from deserializer -air_date string from deserializer -doctor int from deserializer +title string episode title +air_date string initial date +doctor int main actor playing the Doctor in episode PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes PREHOOK: type: LOAD #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/spark/avro_joins_native.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/avro_joins_native.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/spark/avro_joins_native.q.out (working copy) @@ -28,9 +28,9 @@ POSTHOOK: query: DESCRIBE doctors4 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@doctors4 -number int from deserializer -first_name string from deserializer -last_name string from deserializer +number int Order of playing the role +first_name string first name of actor playing role +last_name string last name of actor playing role PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 PREHOOK: type: LOAD #### A masked pattern was here #### @@ -61,9 +61,9 @@ POSTHOOK: query: DESCRIBE episodes POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@episodes -title string from deserializer -air_date string from deserializer -doctor int from deserializer +title string episode title +air_date string initial date +doctor int main actor playing the Doctor in episode PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes PREHOOK: type: LOAD #### A masked pattern was here #### Index: ql/src/test/results/clientpositive/spark/groupby3_map.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/groupby3_map.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/spark/groupby3_map.q.out (working copy) @@ -134,12 +134,32 @@ POSTHOOK: Lineage: dest1.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: query: SELECT +c1, +c2, +round(c3, 11) c3, +c4, +c5, +round(c6, 11) c6, +round(c7, 11) c7, +round(c8, 5) c8, +round(c9, 9) c9 +FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: query: SELECT +c1, +c2, +round(c3, 11) c3, +c4, +c5, +round(c6, 11) c6, +round(c7, 11) c7, +round(c8, 5) c8, +round(c9, 9) c9 +FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876 20469.01089779559 +130091.0 260.182 256.10355987055 98.0 0.0 142.92680950752 143.06995106519 20428.07288 20469.010897796 Index: ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx.q.out (working copy) @@ -264,7 +264,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -585,7 +585,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -755,7 +755,7 @@ Map 1 Map Operator Tree: TableScan - alias: lastyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: lastyear:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (year(l_shipdate) = 1997) (type: boolean) @@ -782,7 +782,7 @@ Map 4 Map Operator Tree: TableScan - alias: thisyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: thisyear:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (year(l_shipdate) = 1998) (type: boolean) @@ -909,7 +909,7 @@ Map 2 Map Operator Tree: TableScan - alias: null-subquery1:dummy-subquery1:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: null-subquery1:dummy-subquery1:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -1053,7 +1053,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__tbl_tbl_key_idx__ + alias: default__tbl_tbl_key_idx__ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) @@ -1163,7 +1163,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__tbl_tbl_key_idx__ + alias: default__tbl_tbl_key_idx__ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) @@ -2368,7 +2368,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__tblpart_tbl_part_index__ + alias: default__tblpart_tbl_part_index__ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -2563,7 +2563,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__tbl_tbl_key_idx__ + alias: default__tbl_tbl_key_idx__ Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) Index: ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/spark/ql_rewrite_gbtoidx_cbo_1.q.out (working copy) @@ -264,7 +264,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -585,7 +585,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -755,7 +755,7 @@ Map 1 Map Operator Tree: TableScan - alias: lastyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: lastyear:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (year(l_shipdate) = 1997) (type: boolean) @@ -782,7 +782,7 @@ Map 4 Map Operator Tree: TableScan - alias: thisyear:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: thisyear:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (year(l_shipdate) = 1998) (type: boolean) @@ -909,7 +909,7 @@ Map 2 Map Operator Tree: TableScan - alias: null-subquery1:dummy-subquery1:default.default__lineitem_ix_lineitem_ix_lshipdate_idx__ + alias: null-subquery1:dummy-subquery1:default__lineitem_ix_lineitem_ix_lshipdate_idx__ Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) @@ -1053,7 +1053,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__tbl_tbl_key_idx__ + alias: default__tbl_tbl_key_idx__ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) @@ -1163,7 +1163,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__tbl_tbl_key_idx__ + alias: default__tbl_tbl_key_idx__ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) @@ -2368,7 +2368,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__tblpart_tbl_part_index__ + alias: default__tblpart_tbl_part_index__ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -2563,7 +2563,7 @@ Map 1 Map Operator Tree: TableScan - alias: default.default__tbl_tbl_key_idx__ + alias: default__tbl_tbl_key_idx__ Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) Index: ql/src/test/results/clientpositive/spark/vector_between_in.q.out =================================================================== --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out (working copy) @@ -495,40 +495,40 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 1970-01-01 1970-01-01 1970-01-01 1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 PREHOOK: query: SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY PREHOOK: Input: default@decimal_date_test @@ -537,2949 +537,2949 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -1678-01-21 -1678-01-24 -1678-02-04 -1678-03-20 -1678-04-12 -1678-08-17 -1678-11-01 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-10-13 -1680-12-14 -1681-02-27 -1681-06-10 -1681-10-21 -1682-03-29 +1678-01-20 +1678-01-23 +1678-02-03 +1678-03-19 +1678-04-11 +1678-08-16 +1678-10-31 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-10-12 +1680-12-13 +1681-02-26 +1681-06-09 +1681-10-20 +1682-03-28 +1682-05-05 1682-05-06 -1682-05-07 -1682-11-12 -1682-11-12 -1682-11-12 -1682-11-12 -1682-11-12 -1682-11-12 -1683-03-27 -1683-06-08 -1683-06-08 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-07-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1684-06-28 -1684-07-23 -1684-09-10 -1684-09-12 -1684-11-13 -1684-11-22 -1685-01-28 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-05-26 -1685-06-21 -1685-08-02 -1685-12-09 -1686-02-01 +1682-11-11 +1682-11-11 +1682-11-11 +1682-11-11 +1682-11-11 +1682-11-11 +1683-03-26 +1683-06-07 +1683-06-07 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-07-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1684-06-27 +1684-07-22 +1684-09-09 +1684-09-11 +1684-11-12 +1684-11-21 +1685-01-27 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-05-25 +1685-06-20 +1685-08-01 +1685-12-08 +1686-01-31 +1686-03-20 1686-03-21 -1686-03-22 -1686-04-02 -1686-11-30 -1686-12-03 -1686-12-03 -1686-12-03 -1686-12-03 -1686-12-03 -1686-12-03 -1687-02-02 -1687-03-05 -1687-03-24 -1687-05-24 -1687-07-16 -1687-09-29 -1687-10-25 -1687-11-07 -1687-11-19 -1687-12-17 -1688-07-24 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-08-03 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-12-30 -1689-01-31 -1689-09-24 -1689-10-30 -1690-01-28 -1690-03-13 -1690-03-20 -1690-05-28 -1690-09-21 -1690-10-01 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1691-02-10 -1691-07-18 -1691-08-09 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1692-05-13 -1692-11-14 -1693-03-18 -1693-07-16 -1693-12-17 -1693-12-27 -1694-06-30 -1694-08-05 -1694-11-18 -1694-11-18 -1694-11-18 -1695-05-20 -1695-10-19 -1695-11-04 -1695-11-11 -1696-02-17 -1696-02-27 -1696-03-24 -1696-04-09 -1696-05-03 -1696-05-18 -1696-05-28 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-26 -1696-11-04 -1696-12-17 -1697-01-25 -1697-05-02 -1697-05-28 -1697-08-30 -1697-10-13 -1697-12-10 -1698-03-02 -1698-04-29 -1698-05-06 -1698-05-26 -1698-05-26 -1698-06-12 -1698-08-09 -1698-11-25 -1698-12-10 -1698-12-10 -1698-12-10 -1699-03-10 -1699-03-14 -1699-04-18 -1699-09-28 -1700-02-27 -1700-04-11 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-05-01 -1700-05-18 -1700-05-18 -1700-05-18 -1700-05-18 -1700-05-18 -1700-06-24 -1700-08-09 -1700-08-14 -1700-09-23 -1700-11-20 -1700-11-24 -1701-02-18 -1701-03-21 -1701-05-30 -1701-07-24 +1686-04-01 +1686-11-29 +1686-12-02 +1686-12-02 +1686-12-02 +1686-12-02 +1686-12-02 +1686-12-02 +1687-02-01 +1687-03-04 +1687-03-23 +1687-05-23 +1687-07-15 +1687-09-28 +1687-10-24 +1687-11-06 +1687-11-18 +1687-12-16 +1688-07-23 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-08-02 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-12-29 +1689-01-30 +1689-09-23 +1689-10-29 +1690-01-27 +1690-03-12 +1690-03-19 +1690-05-27 +1690-09-20 +1690-09-30 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1691-02-09 +1691-07-17 +1691-08-08 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1692-05-12 +1692-11-13 +1693-03-17 +1693-07-15 +1693-12-16 +1693-12-26 +1694-06-29 +1694-08-04 +1694-11-17 +1694-11-17 +1694-11-17 +1695-05-19 +1695-10-18 +1695-11-03 +1695-11-10 +1696-02-16 +1696-02-26 +1696-03-23 +1696-04-08 +1696-05-02 +1696-05-17 +1696-05-27 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-25 +1696-11-03 +1696-12-16 +1697-01-24 +1697-05-01 +1697-05-27 +1697-08-29 +1697-10-12 +1697-12-09 +1698-03-01 +1698-04-28 +1698-05-05 +1698-05-25 +1698-05-25 +1698-06-11 +1698-08-08 +1698-11-24 +1698-12-09 +1698-12-09 +1698-12-09 +1699-03-09 +1699-03-13 +1699-04-17 +1699-09-27 +1700-02-26 +1700-04-10 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-30 +1700-05-17 +1700-05-17 +1700-05-17 +1700-05-17 +1700-05-17 +1700-06-23 +1700-08-08 +1700-08-13 +1700-09-22 +1700-11-19 +1700-11-23 +1701-02-17 +1701-03-20 +1701-05-29 +1701-07-23 +1701-07-29 1701-07-30 -1701-07-31 -1701-09-19 -1701-10-25 -1701-11-03 -1702-01-09 -1702-01-09 -1702-01-09 -1702-01-09 -1702-05-16 -1702-06-04 -1702-07-24 -1702-10-04 -1703-01-30 -1703-02-13 -1703-02-21 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-06-16 -1703-08-18 +1701-09-18 +1701-10-24 +1701-11-02 +1702-01-08 +1702-01-08 +1702-01-08 +1702-01-08 +1702-05-15 +1702-06-03 +1702-07-23 +1702-10-03 +1703-01-29 +1703-02-12 +1703-02-20 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-06-15 +1703-08-17 +1703-09-02 +1703-09-02 +1703-09-02 +1703-09-02 +1703-09-02 1703-09-03 -1703-09-03 -1703-09-03 -1703-09-03 -1703-09-03 -1703-09-04 -1703-09-20 -1703-10-27 -1704-01-23 -1704-08-07 -1704-08-15 -1704-08-20 -1704-09-26 -1704-09-26 -1704-09-26 -1704-09-26 -1704-09-26 -1704-11-24 -1704-12-22 -1705-02-23 -1705-04-18 -1705-04-26 -1705-04-26 -1705-04-26 -1705-04-26 -1705-06-09 -1705-08-06 -1705-12-04 -1706-01-11 -1706-02-13 -1706-06-11 -1706-06-11 -1706-06-21 -1706-06-23 -1706-06-25 -1706-07-13 -1706-07-24 -1706-08-08 -1706-08-11 -1706-09-01 -1706-09-25 -1706-11-15 -1706-12-01 +1703-09-19 +1703-10-26 +1704-01-22 +1704-08-06 +1704-08-14 +1704-08-19 +1704-09-25 +1704-09-25 +1704-09-25 +1704-09-25 +1704-09-25 +1704-11-23 +1704-12-21 +1705-02-22 +1705-04-17 +1705-04-25 +1705-04-25 +1705-04-25 +1705-04-25 +1705-06-08 +1705-08-05 +1705-12-03 +1706-01-10 +1706-02-12 +1706-06-10 +1706-06-10 +1706-06-20 +1706-06-22 +1706-06-24 +1706-07-12 +1706-07-23 +1706-08-07 +1706-08-10 +1706-08-31 +1706-09-24 +1706-11-14 +1706-11-30 +1706-12-23 1706-12-24 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1707-03-05 -1707-04-17 -1707-05-06 -1707-11-23 -1708-02-13 -1708-04-14 -1708-06-25 -1708-07-22 -1708-08-06 -1708-09-22 -1708-10-16 -1708-11-05 -1708-12-31 -1709-03-02 -1709-04-22 -1709-05-15 -1709-09-16 -1710-01-08 -1710-04-30 -1710-05-29 -1710-06-22 -1710-08-02 -1710-09-09 -1710-09-30 -1710-11-25 -1711-01-13 -1711-05-11 -1711-05-24 -1711-08-05 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-10-20 -1711-12-05 -1712-02-02 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1707-03-04 +1707-04-16 +1707-05-05 +1707-11-22 +1708-02-12 +1708-04-13 +1708-06-24 +1708-07-21 +1708-08-05 +1708-09-21 +1708-10-15 +1708-11-04 +1708-12-30 +1709-03-01 +1709-04-21 +1709-05-14 +1709-09-15 +1710-01-07 +1710-04-29 +1710-05-28 +1710-06-21 +1710-08-01 +1710-09-08 +1710-09-29 +1710-11-24 +1711-01-12 +1711-05-10 +1711-05-23 +1711-08-04 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-10-19 +1711-12-04 +1712-02-01 +1712-03-22 1712-03-23 -1712-03-24 -1712-03-26 -1712-05-14 -1712-10-10 -1712-10-10 -1712-10-10 -1712-10-10 -1712-10-10 -1712-12-19 -1713-02-24 -1713-06-02 -1713-06-22 -1713-07-06 -1713-08-19 -1713-08-24 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-27 -1714-12-17 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-03-05 -1715-03-09 -1715-06-22 -1715-07-25 -1715-09-20 -1715-11-10 -1716-05-30 -1716-06-03 -1716-06-07 -1716-07-19 +1712-03-25 +1712-05-13 +1712-10-09 +1712-10-09 +1712-10-09 +1712-10-09 +1712-10-09 +1712-12-18 +1713-02-23 +1713-06-01 +1713-06-21 +1713-07-05 +1713-08-18 +1713-08-23 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-10-26 +1714-12-16 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-03-04 +1715-03-08 +1715-06-21 +1715-07-24 +1715-09-19 +1715-11-09 +1716-05-29 +1716-06-02 +1716-06-06 +1716-07-18 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-16 -1717-02-18 -1717-07-30 -1717-10-18 -1717-11-06 -1717-12-01 -1717-12-15 -1717-12-26 -1717-12-31 -1718-02-08 -1718-02-25 -1718-03-12 -1718-03-28 -1718-09-09 -1718-10-10 -1718-10-10 -1718-10-10 -1718-10-10 -1718-10-10 -1718-10-10 -1718-11-12 -1718-11-15 -1718-12-29 -1719-04-17 -1719-04-22 -1719-05-16 -1719-08-24 -1719-09-22 -1719-12-09 -1719-12-26 -1720-03-02 -1720-03-02 -1720-03-02 -1720-03-02 -1720-03-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-09-05 -1720-10-24 -1720-10-24 -1720-10-24 -1720-10-24 -1720-10-24 -1721-01-23 -1721-03-13 -1721-04-30 -1721-05-14 -1721-06-07 -1721-06-19 -1721-07-12 -1721-08-18 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-02-10 +1717-02-17 +1717-07-29 +1717-10-17 +1717-11-05 +1717-11-30 +1717-12-14 +1717-12-25 +1717-12-30 +1718-02-07 +1718-02-24 +1718-03-11 +1718-03-27 +1718-09-08 +1718-10-09 +1718-10-09 +1718-10-09 +1718-10-09 +1718-10-09 +1718-10-09 +1718-11-11 +1718-11-14 +1718-12-28 +1719-04-16 +1719-04-21 +1719-05-15 +1719-08-23 +1719-09-21 +1719-12-08 +1719-12-25 +1720-03-01 +1720-03-01 +1720-03-01 +1720-03-01 +1720-03-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-09-04 +1720-10-23 +1720-10-23 +1720-10-23 +1720-10-23 +1720-10-23 +1721-01-22 +1721-03-12 +1721-04-29 +1721-05-13 +1721-06-06 +1721-06-18 +1721-07-11 +1721-08-17 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-02-09 +1722-02-24 1722-02-25 -1722-02-26 -1722-06-14 -1722-07-15 -1722-10-01 -1722-12-01 -1722-12-14 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-03-04 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-05-19 -1723-06-21 -1723-07-24 -1723-08-02 -1723-10-30 -1723-12-01 -1724-01-26 +1722-06-13 +1722-07-14 +1722-09-30 +1722-11-30 +1722-12-13 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-03-03 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-05-18 +1723-06-20 +1723-07-23 +1723-08-01 +1723-10-29 +1723-11-30 +1724-01-25 +1724-03-26 1724-03-27 -1724-03-28 -1724-04-15 -1724-05-20 -1724-07-11 -1724-08-23 -1724-10-09 -1724-12-07 -1725-03-19 -1725-06-01 -1725-08-04 -1726-04-02 -1726-07-06 -1726-07-06 -1726-07-21 -1726-10-31 -1727-06-18 -1727-07-14 -1727-07-23 -1727-07-26 -1727-08-22 -1728-02-15 -1728-03-07 -1728-09-19 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-10 -1728-12-17 -1729-04-09 -1729-04-30 -1729-05-09 -1729-06-23 -1729-08-14 -1729-08-29 -1729-11-07 -1729-12-13 -1730-02-03 -1730-02-12 -1730-04-16 -1730-05-14 -1730-08-27 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-04-14 -1731-06-28 -1731-08-09 -1731-08-09 -1731-08-09 -1731-08-09 -1731-10-07 -1731-10-31 -1732-01-21 -1732-01-27 -1732-02-08 -1732-02-19 -1732-02-23 -1732-03-05 -1732-04-27 -1732-06-26 -1732-07-11 -1732-07-22 -1732-08-31 -1732-12-01 -1733-06-23 -1733-09-04 -1733-09-08 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1734-02-22 -1734-03-02 -1734-03-20 -1734-06-16 -1734-07-03 +1724-04-14 +1724-05-19 +1724-07-10 +1724-08-22 +1724-10-08 +1724-12-06 +1725-03-18 +1725-05-31 +1725-08-03 +1726-04-01 +1726-07-05 +1726-07-05 +1726-07-20 +1726-10-30 +1727-06-17 +1727-07-13 +1727-07-22 +1727-07-25 +1727-08-21 +1728-02-14 +1728-03-06 +1728-09-18 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-09 +1728-12-16 +1729-04-08 +1729-04-29 +1729-05-08 +1729-06-22 +1729-08-13 +1729-08-28 +1729-11-06 +1729-12-12 +1730-02-02 +1730-02-11 +1730-04-15 +1730-05-13 +1730-08-26 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-04-13 +1731-06-27 +1731-08-08 +1731-08-08 +1731-08-08 +1731-08-08 +1731-10-06 +1731-10-30 +1732-01-20 +1732-01-26 +1732-02-07 +1732-02-18 +1732-02-22 +1732-03-04 +1732-04-26 +1732-06-25 +1732-07-10 +1732-07-21 +1732-08-30 +1732-11-30 +1733-06-22 +1733-09-03 +1733-09-07 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1734-02-21 +1734-03-01 +1734-03-19 +1734-06-15 +1734-07-02 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-14 -1734-10-24 -1734-12-10 -1735-01-31 -1735-02-11 -1735-02-15 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-09-04 -1735-09-16 -1735-09-28 -1735-11-29 -1735-12-04 -1735-12-12 -1736-04-13 -1736-04-28 -1736-06-24 -1736-09-28 -1736-11-14 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1737-02-23 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-05-15 -1737-06-28 -1737-06-30 -1737-07-05 -1737-07-17 +1734-10-23 +1734-12-09 +1735-01-30 +1735-02-10 +1735-02-14 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-09-03 +1735-09-15 +1735-09-27 +1735-11-28 +1735-12-03 +1735-12-11 +1736-04-12 +1736-04-27 +1736-06-23 +1736-09-27 +1736-11-13 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1737-02-22 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-05-14 +1737-06-27 +1737-06-29 +1737-07-04 +1737-07-16 +1737-08-01 1737-08-02 -1737-08-03 -1737-11-06 -1737-12-09 -1738-01-25 -1738-04-05 -1738-06-01 -1738-06-05 -1738-10-25 -1738-10-25 -1738-10-25 -1738-10-25 -1738-10-25 -1738-10-25 -1739-02-11 -1739-02-19 -1739-02-19 -1739-02-19 -1739-02-19 -1739-02-28 -1739-07-05 -1739-09-04 -1740-01-10 -1740-01-13 -1740-01-13 -1740-01-13 -1740-01-13 -1740-02-07 -1740-03-23 +1737-11-05 +1737-12-08 +1738-01-24 +1738-04-04 +1738-05-31 +1738-06-04 +1738-10-24 +1738-10-24 +1738-10-24 +1738-10-24 +1738-10-24 +1738-10-24 +1739-02-10 +1739-02-18 +1739-02-18 +1739-02-18 +1739-02-18 +1739-02-27 +1739-07-04 +1739-09-03 +1740-01-09 +1740-01-12 +1740-01-12 +1740-01-12 +1740-01-12 +1740-02-06 +1740-03-22 +1740-04-18 1740-04-19 -1740-04-20 -1740-07-13 -1740-11-24 -1740-11-28 -1741-04-14 -1741-06-02 -1741-08-16 -1741-08-27 -1741-09-11 -1741-11-26 -1741-11-26 -1741-12-31 -1742-06-07 -1742-12-09 -1742-12-18 -1742-12-26 -1743-01-11 -1743-01-16 -1743-01-16 -1743-01-16 -1743-01-20 -1743-02-03 -1743-02-10 -1743-12-14 -1744-01-03 -1744-04-14 -1744-09-14 -1744-09-19 -1744-09-24 -1744-12-05 -1744-12-05 -1744-12-05 -1744-12-05 -1745-02-09 -1745-03-15 -1745-05-13 -1745-06-13 -1745-08-21 -1745-10-28 -1745-10-30 -1746-01-20 -1746-01-26 -1746-02-16 -1746-03-18 -1746-07-28 -1746-09-18 -1746-10-02 -1746-12-21 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-04-16 -1747-06-09 -1747-07-29 -1747-09-24 -1747-11-08 -1747-11-27 -1748-03-21 -1748-04-05 -1748-04-14 -1748-04-25 -1748-08-09 -1749-06-27 -1749-10-24 -1750-04-20 -1750-04-27 -1750-05-29 -1750-07-04 -1750-08-01 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-19 -1750-11-22 -1750-12-23 -1750-12-26 -1751-03-01 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-08-22 -1751-12-04 -1751-12-07 -1751-12-25 -1752-03-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-24 -1752-06-06 -1752-08-14 -1752-12-19 -1752-12-19 -1752-12-19 -1752-12-19 -1752-12-19 -1753-03-01 -1753-03-17 -1753-04-12 -1753-07-10 -1753-07-31 -1753-08-26 -1753-09-09 -1753-10-16 -1753-11-23 -1753-11-26 -1753-12-01 -1753-12-01 -1753-12-01 -1753-12-01 -1753-12-01 -1753-12-01 -1754-04-01 -1754-04-21 -1754-05-29 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-07-20 -1754-08-20 -1754-09-04 -1754-12-03 -1755-01-08 -1755-01-11 -1755-02-22 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-05-13 -1755-07-25 -1755-07-25 -1755-07-25 -1755-08-30 -1755-09-03 -1755-09-21 -1755-11-19 -1755-12-17 -1756-08-20 -1756-10-24 -1756-11-03 -1757-02-22 -1757-08-07 -1757-09-17 -1757-10-20 -1757-11-10 -1758-05-14 -1758-05-17 -1758-08-11 -1759-01-22 -1759-02-19 -1759-03-05 -1759-03-05 -1759-03-12 -1759-03-20 -1759-04-27 -1759-05-08 -1759-08-02 -1759-08-10 -1759-09-25 -1759-11-10 -1759-11-25 -1759-11-25 -1759-11-25 -1759-11-25 -1759-11-25 -1760-01-11 -1760-03-03 -1760-03-28 -1760-04-17 -1760-09-11 -1761-01-02 -1761-01-02 -1761-01-02 -1761-01-02 -1761-01-10 -1761-06-23 -1761-08-17 -1761-09-30 -1761-11-14 -1761-11-16 -1761-12-02 -1762-05-04 -1762-05-19 -1762-08-28 -1762-11-28 -1762-11-28 -1762-11-28 -1762-11-28 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-05-19 -1763-07-17 -1763-07-25 -1763-07-29 -1763-08-05 -1763-12-30 -1764-02-05 -1764-04-29 -1764-07-22 -1764-09-30 -1764-12-01 -1765-01-28 -1765-06-30 -1765-08-22 -1765-11-12 -1766-03-15 -1766-07-23 -1766-09-24 -1766-12-14 -1767-03-25 -1767-04-22 -1767-05-09 -1767-05-16 -1767-05-16 -1767-05-16 -1767-08-10 -1767-11-01 -1767-11-15 -1768-02-02 -1768-06-19 -1768-10-31 -1768-12-22 -1768-12-22 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-02-02 -1769-07-28 -1769-08-20 -1770-03-04 -1770-03-08 -1770-03-21 -1770-08-02 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-10-02 -1770-10-25 -1771-04-05 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-07-01 -1772-03-12 -1772-04-24 -1772-04-24 -1772-04-24 -1772-04-24 -1772-04-24 -1772-04-24 -1772-05-27 -1772-09-13 -1772-09-15 -1772-12-24 -1772-12-30 -1773-04-08 -1773-06-07 -1773-11-16 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-19 -1774-06-15 -1774-07-06 -1774-08-01 -1774-11-02 -1775-04-15 -1775-06-04 -1775-06-16 -1775-07-16 +1740-07-12 +1740-11-23 +1740-11-27 +1741-04-13 +1741-06-01 +1741-08-15 +1741-08-26 +1741-09-10 +1741-11-25 +1741-11-25 +1741-12-30 +1742-06-06 +1742-12-08 +1742-12-17 +1742-12-25 +1743-01-10 +1743-01-15 +1743-01-15 +1743-01-15 +1743-01-19 +1743-02-02 +1743-02-09 +1743-12-13 +1744-01-02 +1744-04-13 +1744-09-13 +1744-09-18 +1744-09-23 +1744-12-04 +1744-12-04 +1744-12-04 +1744-12-04 +1745-02-08 +1745-03-14 +1745-05-12 +1745-06-12 +1745-08-20 +1745-10-27 +1745-10-29 +1746-01-19 +1746-01-25 +1746-02-15 +1746-03-17 +1746-07-27 +1746-09-17 +1746-10-01 +1746-12-20 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-04-15 +1747-06-08 +1747-07-28 +1747-09-23 +1747-11-07 +1747-11-26 +1748-03-20 +1748-04-04 +1748-04-13 +1748-04-24 +1748-08-08 +1749-06-26 +1749-10-23 +1750-04-19 +1750-04-26 +1750-05-28 +1750-07-03 +1750-07-31 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-18 +1750-11-21 +1750-12-22 +1750-12-25 +1751-02-28 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-08-21 +1751-12-03 +1751-12-06 +1751-12-24 +1752-03-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-23 +1752-06-05 +1752-08-13 +1752-12-18 +1752-12-18 +1752-12-18 +1752-12-18 +1752-12-18 +1753-02-28 +1753-03-16 +1753-04-11 +1753-07-09 +1753-07-30 +1753-08-25 +1753-09-08 +1753-10-15 +1753-11-22 +1753-11-25 +1753-11-30 +1753-11-30 +1753-11-30 +1753-11-30 +1753-11-30 +1753-11-30 +1754-03-31 +1754-04-20 +1754-05-28 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-07-19 +1754-08-19 +1754-09-03 +1754-12-02 +1755-01-07 +1755-01-10 +1755-02-21 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-05-12 +1755-07-24 +1755-07-24 +1755-07-24 +1755-08-29 +1755-09-02 +1755-09-20 +1755-11-18 +1755-12-16 +1756-08-19 +1756-10-23 +1756-11-02 +1757-02-21 +1757-08-06 +1757-09-16 +1757-10-19 +1757-11-09 +1758-05-13 +1758-05-16 +1758-08-10 +1759-01-21 +1759-02-18 +1759-03-04 +1759-03-04 +1759-03-11 +1759-03-19 +1759-04-26 +1759-05-07 +1759-08-01 +1759-08-09 +1759-09-24 +1759-11-09 +1759-11-24 +1759-11-24 +1759-11-24 +1759-11-24 +1759-11-24 +1760-01-10 +1760-03-02 +1760-03-27 +1760-04-16 +1760-09-10 +1761-01-01 +1761-01-01 +1761-01-01 +1761-01-01 +1761-01-09 +1761-06-22 +1761-08-16 +1761-09-29 +1761-11-13 +1761-11-15 +1761-12-01 +1762-05-03 +1762-05-18 +1762-08-27 +1762-11-27 +1762-11-27 +1762-11-27 +1762-11-27 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-05-18 +1763-07-16 +1763-07-24 +1763-07-28 +1763-08-04 +1763-12-29 +1764-02-04 +1764-04-28 +1764-07-21 +1764-09-29 +1764-11-30 +1765-01-27 +1765-06-29 +1765-08-21 +1765-11-11 +1766-03-14 +1766-07-22 +1766-09-23 +1766-12-13 +1767-03-24 +1767-04-21 +1767-05-08 +1767-05-15 +1767-05-15 +1767-05-15 +1767-08-09 +1767-10-31 +1767-11-14 +1768-02-01 +1768-06-18 +1768-10-30 +1768-12-21 +1768-12-21 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-02-01 +1769-07-27 +1769-08-19 +1770-03-03 +1770-03-07 +1770-03-20 +1770-08-01 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-10-01 +1770-10-24 +1771-04-04 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-06-30 +1772-03-11 +1772-04-23 +1772-04-23 +1772-04-23 +1772-04-23 +1772-04-23 +1772-04-23 +1772-05-26 +1772-09-12 +1772-09-14 +1772-12-23 +1772-12-29 +1773-04-07 +1773-06-06 +1773-11-15 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-18 +1774-06-14 +1774-07-05 +1774-07-31 +1774-11-01 +1775-04-14 +1775-06-03 +1775-06-15 +1775-07-15 +1776-01-28 1776-01-29 -1776-01-30 -1776-09-18 -1776-09-26 -1776-10-09 -1776-10-15 -1776-12-06 -1777-01-28 -1777-01-30 -1777-01-30 -1777-01-30 -1777-01-30 -1777-01-30 -1777-01-30 -1777-04-04 -1777-04-16 -1777-05-26 -1777-06-05 -1778-01-09 -1778-04-25 -1779-01-11 -1779-04-02 -1779-04-10 -1779-04-29 -1779-04-29 -1779-04-29 -1779-04-29 -1779-04-29 -1779-08-02 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1780-02-01 -1780-12-11 -1781-02-13 -1781-08-19 -1781-10-10 -1781-11-20 -1782-02-08 -1782-05-17 -1782-06-06 -1782-06-09 -1782-06-20 -1782-07-04 -1782-10-04 -1782-10-10 -1782-10-10 -1782-10-10 -1782-10-10 -1782-10-10 -1782-11-04 -1783-01-15 -1783-05-14 -1783-07-16 -1783-07-16 -1783-07-24 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-29 -1784-04-12 -1784-05-09 -1785-01-06 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-05-22 -1785-06-09 -1785-06-19 -1785-08-29 -1785-09-30 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1786-03-19 -1786-06-08 -1786-08-06 -1786-08-29 -1786-09-23 -1786-09-29 -1786-10-06 -1787-01-28 -1787-02-23 -1787-04-24 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-31 -1787-05-31 -1787-05-31 -1787-05-31 -1787-05-31 -1787-05-31 -1787-06-28 -1787-07-07 -1787-08-02 -1787-10-06 -1787-10-19 -1787-10-24 -1787-11-11 -1787-12-19 +1776-09-17 +1776-09-25 +1776-10-08 +1776-10-14 +1776-12-05 +1777-01-27 +1777-01-29 +1777-01-29 +1777-01-29 +1777-01-29 +1777-01-29 +1777-01-29 +1777-04-03 +1777-04-15 +1777-05-25 +1777-06-04 +1778-01-08 +1778-04-24 +1779-01-10 +1779-04-01 +1779-04-09 +1779-04-28 +1779-04-28 +1779-04-28 +1779-04-28 +1779-04-28 +1779-08-01 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1780-01-31 +1780-12-10 +1781-02-12 +1781-08-18 +1781-10-09 +1781-11-19 +1782-02-07 +1782-05-16 +1782-06-05 +1782-06-08 +1782-06-19 +1782-07-03 +1782-10-03 +1782-10-09 +1782-10-09 +1782-10-09 +1782-10-09 +1782-10-09 +1782-11-03 +1783-01-14 +1783-05-13 +1783-07-15 +1783-07-15 +1783-07-23 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-28 +1784-04-11 +1784-05-08 +1785-01-05 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-05-21 +1785-06-08 +1785-06-18 +1785-08-28 +1785-09-29 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1786-03-18 +1786-06-07 +1786-08-05 +1786-08-28 +1786-09-22 +1786-09-28 +1786-10-05 +1787-01-27 +1787-02-22 +1787-04-23 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-30 +1787-05-30 +1787-05-30 +1787-05-30 +1787-05-30 +1787-05-30 +1787-06-27 +1787-07-06 +1787-08-01 +1787-10-05 +1787-10-18 +1787-10-23 +1787-11-10 +1787-12-18 +1788-04-05 1788-04-06 -1788-04-07 -1788-07-04 -1788-08-06 -1789-01-22 -1789-01-22 -1789-01-22 -1789-02-08 -1789-04-18 -1789-05-10 -1789-08-10 -1790-01-30 -1790-04-19 -1790-10-10 -1791-01-24 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-07-20 -1791-10-04 -1792-06-16 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-08-20 -1793-08-22 -1793-11-19 -1794-03-19 -1794-03-19 -1794-03-19 -1794-03-19 -1794-03-19 -1794-04-16 -1794-04-20 -1794-05-15 -1794-07-03 -1794-08-13 -1794-09-02 -1794-09-24 -1794-10-16 -1794-11-02 -1794-11-14 -1795-01-17 -1795-03-09 -1795-05-27 -1795-05-27 -1795-05-27 -1795-05-27 -1795-05-27 +1788-07-03 +1788-08-05 +1789-01-21 +1789-01-21 +1789-01-21 +1789-02-07 +1789-04-17 +1789-05-09 +1789-08-09 +1790-01-29 +1790-04-18 +1790-10-09 +1791-01-23 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-07-19 +1791-10-03 +1792-06-15 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-08-19 +1793-08-21 +1793-11-18 +1794-03-18 +1794-03-18 +1794-03-18 +1794-03-18 +1794-03-18 +1794-04-15 +1794-04-19 +1794-05-14 +1794-07-02 +1794-08-12 +1794-09-01 +1794-09-23 +1794-10-15 +1794-11-01 +1794-11-13 +1795-01-16 +1795-03-08 +1795-05-26 +1795-05-26 +1795-05-26 +1795-05-26 +1795-05-26 +1797-01-03 1797-01-04 -1797-01-05 -1797-09-29 -1797-10-01 -1798-02-14 -1798-09-28 -1799-04-29 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-12-02 -1800-03-30 -1800-04-05 -1800-04-23 -1800-09-04 -1800-10-01 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-11-25 -1801-12-15 -1802-03-30 -1802-04-11 -1802-06-01 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-08-13 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-30 -1802-11-26 -1802-12-13 -1803-02-04 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-14 -1803-07-11 -1803-12-02 -1803-12-08 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-04 -1804-03-08 -1804-07-16 -1804-10-23 -1804-12-28 -1805-01-27 -1805-03-19 -1805-07-15 -1805-07-20 -1805-10-23 -1805-10-23 -1805-10-23 -1805-10-23 -1805-10-23 -1806-01-02 -1806-02-10 -1806-10-12 -1807-02-18 -1807-02-23 -1807-03-09 -1807-06-15 -1807-07-09 -1807-09-02 -1807-10-25 -1807-10-29 -1807-12-29 -1808-03-03 -1808-03-13 -1808-05-10 -1808-07-02 -1808-09-10 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-04-23 -1809-06-06 -1809-09-15 -1809-09-18 -1809-12-24 -1810-08-17 -1810-08-17 -1811-01-27 -1811-01-27 -1811-01-27 -1811-01-27 -1811-04-10 -1811-04-27 -1811-05-31 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-08-14 -1812-01-17 -1812-05-25 -1812-06-11 -1812-10-16 -1812-12-24 -1813-02-04 -1813-03-18 -1813-04-11 -1813-07-09 -1813-08-20 -1813-10-20 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-04-26 -1814-05-28 -1814-11-09 -1814-11-20 -1814-12-21 -1815-01-16 -1815-02-23 -1815-03-10 -1815-04-30 -1815-07-30 -1816-01-13 -1816-02-13 -1816-03-13 -1816-08-03 -1816-08-12 -1816-12-25 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-17 -1817-05-15 -1817-05-20 -1817-06-02 -1817-07-02 -1817-07-12 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 +1797-09-28 +1797-09-30 +1798-02-13 +1798-09-27 +1799-04-28 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-12-01 +1800-03-29 +1800-04-04 +1800-04-22 +1800-09-03 +1800-09-30 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-11-24 +1801-12-14 +1802-03-29 +1802-04-10 +1802-05-31 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-08-12 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-29 +1802-11-25 +1802-12-12 +1803-02-03 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-13 +1803-07-10 +1803-12-01 +1803-12-07 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-03-03 +1804-03-07 +1804-07-15 +1804-10-22 +1804-12-27 +1805-01-26 +1805-03-18 +1805-07-14 +1805-07-19 +1805-10-22 +1805-10-22 +1805-10-22 +1805-10-22 +1805-10-22 +1806-01-01 +1806-02-09 +1806-10-11 +1807-02-17 +1807-02-22 +1807-03-08 +1807-06-14 +1807-07-08 +1807-09-01 +1807-10-24 +1807-10-28 +1807-12-28 +1808-03-02 +1808-03-12 +1808-05-09 +1808-07-01 +1808-09-09 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-04-22 +1809-06-05 +1809-09-14 +1809-09-17 +1809-12-23 +1810-08-16 +1810-08-16 +1811-01-26 +1811-01-26 +1811-01-26 +1811-01-26 +1811-04-09 +1811-04-26 +1811-05-30 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-08-13 +1812-01-16 +1812-05-24 +1812-06-10 +1812-10-15 +1812-12-23 +1813-02-03 +1813-03-17 +1813-04-10 +1813-07-08 +1813-08-19 +1813-10-19 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-04-25 +1814-05-27 +1814-11-08 +1814-11-19 +1814-12-20 +1815-01-15 +1815-02-22 +1815-03-09 +1815-04-29 +1815-07-29 +1816-01-12 +1816-02-12 +1816-03-12 +1816-08-02 +1816-08-11 +1816-12-24 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-16 +1817-05-14 +1817-05-19 +1817-06-01 +1817-07-01 +1817-07-11 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-10-14 1817-10-15 -1817-10-16 -1817-12-18 -1818-01-01 -1818-02-25 -1818-05-26 -1818-06-02 -1818-07-17 -1818-08-13 -1818-09-21 -1818-09-23 -1818-09-25 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-22 -1819-11-19 -1820-04-09 -1820-04-11 -1820-06-04 -1820-07-14 -1820-08-05 -1820-10-01 -1820-10-03 -1820-11-01 -1820-11-29 -1820-11-29 -1820-11-29 -1820-11-29 -1820-11-29 -1820-11-29 -1821-01-07 -1821-01-12 -1821-05-16 -1821-07-26 -1821-08-14 -1821-10-11 -1821-11-10 -1821-12-08 -1822-01-22 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-16 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-08-19 -1822-11-23 -1822-12-03 -1823-01-31 -1823-03-15 -1823-03-18 +1817-12-17 +1817-12-31 +1818-02-24 +1818-05-25 +1818-06-01 +1818-07-16 +1818-08-12 +1818-09-20 +1818-09-22 +1818-09-24 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-21 +1819-11-18 +1820-04-08 +1820-04-10 +1820-06-03 +1820-07-13 +1820-08-04 +1820-09-30 +1820-10-02 +1820-10-31 +1820-11-28 +1820-11-28 +1820-11-28 +1820-11-28 +1820-11-28 +1820-11-28 +1821-01-06 +1821-01-11 +1821-05-15 +1821-07-25 +1821-08-13 +1821-10-10 +1821-11-09 +1821-12-07 +1822-01-21 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-15 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-08-18 +1822-11-22 +1822-12-02 +1823-01-30 +1823-03-14 +1823-03-17 +1823-05-12 1823-05-13 -1823-05-14 -1823-06-03 -1823-08-06 -1823-10-08 -1824-01-08 -1824-01-21 -1824-02-08 -1824-06-29 -1824-07-10 -1824-08-13 -1824-08-25 -1824-09-16 -1825-02-06 -1825-02-19 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-06-18 -1825-08-24 -1825-08-28 -1825-11-06 -1825-12-27 -1826-01-16 -1826-07-25 -1826-11-10 -1826-11-29 -1827-05-13 -1827-06-12 -1827-06-22 -1827-07-05 -1827-07-23 -1827-07-23 -1827-07-23 -1827-08-07 -1827-12-13 -1828-02-14 -1828-06-20 -1828-10-14 -1829-02-17 -1829-07-24 -1829-11-09 -1830-05-27 -1830-11-19 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1831-01-29 -1831-03-11 -1831-05-26 -1831-07-23 -1831-08-18 -1831-08-21 -1831-09-16 -1831-10-17 -1831-12-18 -1832-11-12 -1833-04-26 -1833-05-08 -1833-05-08 -1833-05-08 -1833-05-08 -1833-05-08 -1833-07-05 -1833-10-07 -1833-11-29 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-21 -1834-02-18 -1834-04-22 -1834-05-14 -1834-05-14 -1834-05-14 -1834-05-14 -1834-05-14 -1834-05-14 -1834-10-21 -1834-11-05 -1835-01-03 -1835-06-18 -1835-07-24 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-09-05 -1835-12-03 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-05-21 -1836-05-27 -1836-09-08 -1836-10-14 -1837-01-18 -1837-03-01 -1837-03-30 -1837-07-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-10-17 -1837-12-16 -1838-01-22 -1838-08-04 -1838-12-02 -1839-01-07 -1839-02-02 -1839-04-23 -1839-04-23 -1839-04-23 -1839-04-23 -1839-04-23 -1839-05-14 -1839-05-14 -1839-05-14 -1839-05-14 -1839-05-14 -1839-05-14 -1839-07-28 -1840-01-18 -1840-01-23 -1840-02-03 -1840-02-24 -1840-12-13 -1840-12-19 -1841-04-18 -1841-05-17 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-06-04 -1841-12-05 -1842-04-04 -1842-05-18 -1842-05-18 -1842-05-18 -1842-05-18 -1842-05-18 -1842-05-18 -1842-06-03 -1842-12-29 -1843-03-14 -1843-07-08 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-10-10 -1843-12-17 -1844-03-26 -1844-07-20 -1844-07-23 -1844-08-22 -1844-09-20 -1844-11-03 -1845-05-08 -1845-05-13 -1845-09-06 -1846-01-05 -1846-02-09 -1846-08-14 -1846-09-19 -1846-12-05 -1847-01-09 +1823-06-02 +1823-08-05 +1823-10-07 +1824-01-07 +1824-01-20 +1824-02-07 +1824-06-28 +1824-07-09 +1824-08-12 +1824-08-24 +1824-09-15 +1825-02-05 +1825-02-18 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-06-17 +1825-08-23 +1825-08-27 +1825-11-05 +1825-12-26 +1826-01-15 +1826-07-24 +1826-11-09 +1826-11-28 +1827-05-12 +1827-06-11 +1827-06-21 +1827-07-04 +1827-07-22 +1827-07-22 +1827-07-22 +1827-08-06 +1827-12-12 +1828-02-13 +1828-06-19 +1828-10-13 +1829-02-16 +1829-07-23 +1829-11-08 +1830-05-26 +1830-11-18 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1831-01-28 +1831-03-10 +1831-05-25 +1831-07-22 +1831-08-17 +1831-08-20 +1831-09-15 +1831-10-16 +1831-12-17 +1832-11-11 +1833-04-25 +1833-05-07 +1833-05-07 +1833-05-07 +1833-05-07 +1833-05-07 +1833-07-04 +1833-10-06 +1833-11-28 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-20 +1834-02-17 +1834-04-21 +1834-05-13 +1834-05-13 +1834-05-13 +1834-05-13 +1834-05-13 +1834-05-13 +1834-10-20 +1834-11-04 +1835-01-02 +1835-06-17 +1835-07-23 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-09-04 +1835-12-02 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-05-20 +1836-05-26 +1836-09-07 +1836-10-13 +1837-01-17 +1837-02-28 +1837-03-29 +1837-07-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-10-16 +1837-12-15 +1838-01-21 +1838-08-03 +1838-12-01 +1839-01-06 +1839-02-01 +1839-04-22 +1839-04-22 +1839-04-22 +1839-04-22 +1839-04-22 +1839-05-13 +1839-05-13 +1839-05-13 +1839-05-13 +1839-05-13 +1839-05-13 +1839-07-27 +1840-01-17 +1840-01-22 +1840-02-02 +1840-02-23 +1840-12-12 +1840-12-18 +1841-04-17 +1841-05-16 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-06-03 +1841-12-04 +1842-04-03 +1842-05-17 +1842-05-17 +1842-05-17 +1842-05-17 +1842-05-17 +1842-05-17 +1842-06-02 +1842-12-28 +1843-03-13 +1843-07-07 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-10-09 +1843-12-16 +1844-03-25 +1844-07-19 +1844-07-22 +1844-08-21 +1844-09-19 +1844-11-02 +1845-05-07 +1845-05-12 +1845-09-05 +1846-01-04 +1846-02-08 +1846-08-13 +1846-09-18 +1846-12-04 +1847-01-08 +1847-02-24 1847-02-25 -1847-02-26 -1847-04-05 -1847-04-22 -1847-07-26 -1847-08-23 -1848-03-01 -1849-03-08 -1849-03-31 -1849-04-16 -1849-05-22 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-08-28 -1849-09-11 -1850-01-21 -1850-03-19 -1850-04-08 -1850-08-30 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-12-31 -1851-02-12 -1851-02-12 -1851-02-12 -1851-02-12 -1851-02-12 -1851-03-15 -1851-06-04 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-27 -1851-10-08 -1851-11-11 -1852-02-10 -1852-02-20 -1852-04-13 -1852-04-24 -1852-06-15 -1852-09-02 -1852-09-12 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-31 -1853-01-26 -1853-07-26 -1853-09-16 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-11-21 -1853-12-13 -1853-12-30 -1854-01-31 -1854-02-28 -1854-05-03 -1854-05-30 -1854-05-30 -1854-05-30 -1854-05-30 -1854-05-30 -1854-07-17 -1854-12-22 -1854-12-29 -1855-02-24 -1855-10-31 -1855-11-07 -1855-11-30 -1855-12-24 -1856-01-13 -1856-05-07 -1856-05-20 -1856-05-22 -1856-06-26 -1856-07-12 -1856-10-06 -1856-11-16 -1857-04-14 -1857-05-23 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-07-14 -1857-08-14 -1857-10-19 -1858-02-15 -1858-02-24 -1858-07-04 -1858-07-15 -1858-10-25 -1858-10-28 -1859-01-18 -1859-03-08 -1859-03-20 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-04-10 -1859-05-23 -1859-08-31 -1859-09-17 -1859-09-17 -1859-09-17 -1859-11-21 -1859-12-31 -1860-03-10 -1860-03-12 -1860-05-15 -1860-08-22 -1860-09-19 -1860-12-03 -1861-04-23 -1861-08-14 -1861-12-06 -1861-12-19 -1862-01-12 -1862-03-01 -1862-03-20 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-06-26 -1863-02-22 -1863-06-17 -1863-09-12 -1863-12-27 -1863-12-29 -1864-02-24 -1864-07-19 -1864-08-12 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-12-04 -1865-12-17 -1866-01-04 -1866-01-10 -1866-03-29 -1866-04-07 -1866-04-14 -1866-09-03 -1866-10-21 -1867-05-11 -1867-06-21 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-09-03 -1867-09-26 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-27 -1867-11-10 -1867-11-12 -1867-11-12 -1867-11-12 -1867-11-12 -1867-11-12 -1867-11-25 -1869-01-21 -1869-01-27 -1869-02-18 -1869-03-09 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-12-03 -1870-01-17 -1870-07-27 -1870-08-21 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-21 -1870-10-08 -1870-11-19 -1871-01-22 -1871-01-27 -1871-02-14 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-03-31 -1871-07-29 -1871-08-01 -1871-08-03 -1871-08-17 -1872-02-16 -1872-05-28 -1872-06-06 -1872-07-26 -1872-10-09 -1873-02-27 -1873-05-29 -1873-07-02 -1873-07-12 -1873-07-15 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-27 -1873-07-30 -1873-09-06 -1873-09-06 -1873-09-06 -1873-09-06 -1873-12-23 -1874-01-02 -1874-04-04 -1874-04-14 -1874-07-09 -1874-07-21 -1874-09-24 -1874-10-24 -1874-11-26 -1874-12-16 -1875-03-28 -1875-04-24 -1875-05-12 -1875-07-18 -1875-08-16 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-11-02 -1875-11-27 -1876-08-16 -1876-10-18 -1876-12-15 -1876-12-28 -1877-03-02 -1877-03-06 -1877-03-19 -1877-06-22 -1877-07-17 -1877-09-01 -1877-10-04 -1878-01-07 -1878-02-02 -1878-04-11 -1878-04-30 -1878-06-26 -1878-10-17 -1878-10-21 -1878-11-17 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-03-10 -1879-07-23 -1879-09-16 -1879-12-02 -1880-05-30 +1847-04-04 +1847-04-21 +1847-07-25 +1847-08-22 +1848-02-29 +1849-03-07 +1849-03-30 +1849-04-15 +1849-05-21 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-08-27 +1849-09-10 +1850-01-20 +1850-03-18 +1850-04-07 +1850-08-29 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-12-30 +1851-02-11 +1851-02-11 +1851-02-11 +1851-02-11 +1851-02-11 +1851-03-14 +1851-06-03 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-26 +1851-10-07 +1851-11-10 +1852-02-09 +1852-02-19 +1852-04-12 +1852-04-23 +1852-06-14 +1852-09-01 +1852-09-11 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-30 +1853-01-25 +1853-07-25 +1853-09-15 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-11-20 +1853-12-12 +1853-12-29 +1854-01-30 +1854-02-27 +1854-05-02 +1854-05-29 +1854-05-29 +1854-05-29 +1854-05-29 +1854-05-29 +1854-07-16 +1854-12-21 +1854-12-28 +1855-02-23 +1855-10-30 +1855-11-06 +1855-11-29 +1855-12-23 +1856-01-12 +1856-05-06 +1856-05-19 +1856-05-21 +1856-06-25 +1856-07-11 +1856-10-05 +1856-11-15 +1857-04-13 +1857-05-22 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-07-13 +1857-08-13 +1857-10-18 +1858-02-14 +1858-02-23 +1858-07-03 +1858-07-14 +1858-10-24 +1858-10-27 +1859-01-17 +1859-03-07 +1859-03-19 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-04-09 +1859-05-22 +1859-08-30 +1859-09-16 +1859-09-16 +1859-09-16 +1859-11-20 +1859-12-30 +1860-03-09 +1860-03-11 +1860-05-14 +1860-08-21 +1860-09-18 +1860-12-02 +1861-04-22 +1861-08-13 +1861-12-05 +1861-12-18 +1862-01-11 +1862-02-28 +1862-03-19 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-06-25 +1863-02-21 +1863-06-16 +1863-09-11 +1863-12-26 +1863-12-28 +1864-02-23 +1864-07-18 +1864-08-11 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-12-03 +1865-12-16 +1866-01-03 +1866-01-09 +1866-03-28 +1866-04-06 +1866-04-13 +1866-09-02 +1866-10-20 +1867-05-10 +1867-06-20 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-09-02 +1867-09-25 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-10-26 +1867-11-09 +1867-11-11 +1867-11-11 +1867-11-11 +1867-11-11 +1867-11-11 +1867-11-24 +1869-01-20 +1869-01-26 +1869-02-17 +1869-03-08 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-12-02 +1870-01-16 +1870-07-26 +1870-08-20 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-20 +1870-10-07 +1870-11-18 +1871-01-21 +1871-01-26 +1871-02-13 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-03-30 +1871-07-28 +1871-07-31 +1871-08-02 +1871-08-16 +1872-02-15 +1872-05-27 +1872-06-05 +1872-07-25 +1872-10-08 +1873-02-26 +1873-05-28 +1873-07-01 +1873-07-11 +1873-07-14 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-26 +1873-07-29 +1873-09-05 +1873-09-05 +1873-09-05 +1873-09-05 +1873-12-22 +1874-01-01 +1874-04-03 +1874-04-13 +1874-07-08 +1874-07-20 +1874-09-23 +1874-10-23 +1874-11-25 +1874-12-15 +1875-03-27 +1875-04-23 +1875-05-11 +1875-07-17 +1875-08-15 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-11-01 +1875-11-26 +1876-08-15 +1876-10-17 +1876-12-14 +1876-12-27 +1877-03-01 +1877-03-05 +1877-03-18 +1877-06-21 +1877-07-16 +1877-08-31 +1877-10-03 +1878-01-06 +1878-02-01 +1878-04-10 +1878-04-29 +1878-06-25 +1878-10-16 +1878-10-20 +1878-11-16 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-03-09 +1879-07-22 +1879-09-15 +1879-12-01 +1880-05-29 +1880-06-29 1880-06-30 -1880-07-01 -1880-11-03 -1880-11-30 -1880-12-07 -1881-01-23 -1881-01-27 -1881-05-03 -1881-05-15 -1881-06-23 -1881-07-12 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-09-13 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-14 -1882-05-01 -1882-05-25 -1882-07-22 -1882-11-09 -1883-04-11 -1883-05-26 -1883-06-01 -1883-10-14 -1883-10-20 -1883-10-29 -1883-10-29 -1883-10-29 -1883-10-29 -1883-10-29 -1884-08-05 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-09-23 -1884-11-05 -1884-11-20 -1884-12-15 -1885-01-03 -1885-01-22 -1885-02-20 -1885-05-25 -1885-06-21 -1885-08-08 -1886-03-21 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-07 -1886-04-25 -1886-06-01 -1886-07-25 -1886-11-10 -1886-12-02 -1887-01-16 -1887-06-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-10-26 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-12-08 -1888-01-15 -1888-02-11 -1888-08-08 -1888-11-03 -1888-11-15 -1889-03-10 -1889-06-06 -1889-06-13 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-09-20 -1890-02-24 -1890-04-29 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-09-15 +1880-11-02 +1880-11-29 +1880-12-06 +1881-01-22 +1881-01-26 +1881-05-02 +1881-05-14 +1881-06-22 +1881-07-11 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-09-12 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-13 +1882-04-30 +1882-05-24 +1882-07-21 +1882-11-08 +1883-04-10 +1883-05-25 +1883-05-31 +1883-10-13 +1883-10-19 +1883-10-28 +1883-10-28 +1883-10-28 +1883-10-28 +1883-10-28 +1884-08-04 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-09-22 +1884-11-04 +1884-11-19 +1884-12-14 +1885-01-02 +1885-01-21 +1885-02-19 +1885-05-24 +1885-06-20 +1885-08-07 +1886-03-20 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-06 +1886-04-24 +1886-05-31 +1886-07-24 +1886-11-09 +1886-12-01 +1887-01-15 +1887-06-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-10-25 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-12-07 +1888-01-14 +1888-02-10 +1888-08-07 +1888-11-02 +1888-11-14 +1889-03-09 +1889-06-05 +1889-06-12 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-09-19 +1890-02-23 +1890-04-28 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-09-14 +1890-10-04 1890-10-05 -1890-10-06 -1890-11-10 -1891-01-10 -1891-01-13 -1891-06-20 -1891-07-13 -1892-03-26 -1892-05-13 -1892-09-10 -1893-01-18 -1893-07-10 +1890-11-09 +1891-01-09 +1891-01-12 +1891-06-19 +1891-07-12 +1892-03-25 +1892-05-12 +1892-09-09 +1893-01-17 +1893-07-09 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-20 -1894-06-05 -1894-06-18 -1894-10-01 -1894-10-10 -1894-11-28 -1895-01-20 -1895-02-07 -1895-09-04 -1895-11-01 -1895-12-13 -1895-12-31 -1896-01-02 -1896-01-13 -1896-01-26 -1896-02-26 -1896-03-09 -1896-05-05 -1896-05-10 -1896-08-08 -1896-08-14 -1896-08-25 -1897-02-21 -1897-06-09 -1897-06-12 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-11-03 -1897-12-01 -1898-02-23 -1898-02-27 -1898-03-06 -1898-04-13 -1898-05-22 -1898-06-21 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1899-01-30 -1899-02-16 -1899-03-01 -1899-06-24 -1899-08-27 -1899-10-20 -1900-04-27 -1900-09-17 -1900-10-19 -1901-02-10 -1901-04-13 -1901-05-12 -1901-05-12 -1901-05-12 -1901-05-12 -1901-05-29 -1901-10-05 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1902-01-24 -1902-05-14 -1902-06-14 -1902-07-29 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-17 -1903-03-20 -1903-04-23 -1903-05-12 -1904-05-24 -1904-06-30 -1904-08-20 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-10-14 -1904-11-30 -1905-03-27 -1905-06-07 -1905-06-15 -1905-07-20 -1905-09-05 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-23 -1905-12-15 -1906-01-11 -1906-07-25 -1906-08-27 -1906-09-02 -1906-11-02 -1906-12-13 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1908-01-24 -1908-03-28 -1908-05-03 -1908-05-28 -1908-06-27 -1908-06-29 -1908-12-19 -1909-11-21 -1910-01-23 -1910-02-16 -1910-03-05 -1910-03-15 -1910-04-06 -1910-05-12 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-06-18 -1910-08-17 -1910-11-06 -1911-05-05 -1911-06-22 -1911-11-19 -1912-04-14 -1912-05-02 -1912-06-12 -1913-01-30 -1913-02-12 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-07-14 -1913-09-26 -1913-09-26 -1913-09-26 -1913-10-24 -1913-12-15 -1914-02-18 -1914-08-19 -1915-02-05 -1915-03-05 -1915-08-10 -1915-08-15 -1915-11-05 -1915-12-12 -1915-12-18 -1916-05-05 -1916-05-12 -1916-06-07 -1916-06-11 +1894-06-04 +1894-06-17 +1894-09-30 +1894-10-09 +1894-11-27 +1895-01-19 +1895-02-06 +1895-09-03 +1895-10-31 +1895-12-12 +1895-12-30 +1896-01-01 +1896-01-12 +1896-01-25 +1896-02-25 +1896-03-08 +1896-05-04 +1896-05-09 +1896-08-07 +1896-08-13 +1896-08-24 +1897-02-20 +1897-06-08 +1897-06-11 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-11-02 +1897-11-30 +1898-02-22 +1898-02-26 +1898-03-05 +1898-04-12 +1898-05-21 +1898-06-20 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1899-01-29 +1899-02-15 +1899-02-28 +1899-06-23 +1899-08-26 +1899-10-19 +1900-04-26 +1900-09-16 +1900-10-18 +1901-02-09 +1901-04-12 +1901-05-11 +1901-05-11 +1901-05-11 +1901-05-11 +1901-05-28 +1901-10-04 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1902-01-23 +1902-05-13 +1902-06-13 +1902-07-28 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-16 +1903-03-19 +1903-04-22 +1903-05-11 +1904-05-23 +1904-06-29 +1904-08-19 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-10-13 +1904-11-29 +1905-03-26 +1905-06-06 +1905-06-14 +1905-07-19 +1905-09-04 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-22 +1905-12-14 +1906-01-10 +1906-07-24 +1906-08-26 +1906-09-01 +1906-11-01 +1906-12-12 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1908-01-23 +1908-03-27 +1908-05-02 +1908-05-27 +1908-06-26 +1908-06-28 +1908-12-18 +1909-11-20 +1910-01-22 +1910-02-15 +1910-03-04 +1910-03-14 +1910-04-05 +1910-05-11 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-06-17 +1910-08-16 +1910-11-05 +1911-05-04 +1911-06-21 +1911-11-18 +1912-04-13 +1912-05-01 +1912-06-11 +1913-01-29 +1913-02-11 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-07-13 +1913-09-25 +1913-09-25 +1913-09-25 +1913-10-23 +1913-12-14 +1914-02-17 +1914-08-18 +1915-02-04 +1915-03-04 +1915-08-09 +1915-08-14 +1915-11-04 +1915-12-11 +1915-12-17 +1916-05-04 +1916-05-11 +1916-06-06 +1916-06-10 +1916-08-08 1916-08-09 -1916-08-10 -1917-04-16 -1917-06-28 -1917-12-07 -1918-02-10 -1918-02-10 -1918-02-10 -1918-02-10 -1918-02-10 -1918-02-10 -1918-09-11 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1919-02-07 -1919-04-07 -1919-06-26 -1919-08-22 -1919-10-04 -1919-10-04 -1919-10-21 -1920-01-05 -1920-05-06 -1920-06-30 -1920-08-04 -1920-08-18 -1920-10-19 -1921-02-19 -1921-03-14 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-06-02 -1921-07-03 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-11-04 -1921-11-04 -1921-11-04 -1921-11-04 -1921-11-04 -1921-11-04 -1922-01-15 -1922-04-20 -1922-06-22 -1922-07-22 -1923-01-24 -1923-03-08 -1923-03-24 -1923-05-29 -1923-08-12 -1923-08-31 -1923-09-20 -1923-11-15 -1923-12-16 -1924-01-26 -1924-03-16 -1924-05-06 -1924-06-23 -1924-07-04 -1924-11-22 -1924-12-10 -1925-02-17 -1925-06-05 -1925-09-05 -1925-09-08 -1925-10-23 -1925-12-30 -1926-03-30 -1926-04-10 -1926-05-27 -1926-09-07 -1926-12-07 -1927-02-11 -1927-03-27 -1927-04-05 -1927-04-05 -1927-04-05 -1927-04-05 -1927-04-05 -1927-04-05 -1927-05-16 -1927-07-16 -1927-07-26 -1927-08-26 -1927-09-03 -1927-11-01 -1927-11-28 -1928-01-10 -1928-02-25 -1928-05-11 -1928-07-29 -1928-08-27 -1929-03-07 -1929-04-01 -1929-04-05 -1929-05-29 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1930-02-01 -1930-04-09 -1930-04-09 -1930-04-09 -1930-04-09 -1930-06-11 -1930-07-07 -1930-09-28 -1930-12-21 -1931-04-08 -1931-07-01 -1931-08-30 -1931-10-31 -1931-10-31 -1931-12-12 -1932-03-15 -1932-03-30 -1932-04-21 -1932-05-31 -1932-10-27 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-08-27 -1933-08-30 -1933-11-30 -1933-12-22 -1934-06-02 -1934-08-13 -1934-09-12 -1934-11-10 -1934-11-10 -1934-11-10 -1934-11-10 -1934-11-10 -1934-11-10 -1935-04-11 -1936-01-31 -1936-06-23 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-11-04 -1937-01-22 -1937-02-04 +1917-04-15 +1917-06-27 +1917-12-06 +1918-02-09 +1918-02-09 +1918-02-09 +1918-02-09 +1918-02-09 +1918-02-09 +1918-09-10 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1919-02-06 +1919-04-06 +1919-06-25 +1919-08-21 +1919-10-03 +1919-10-03 +1919-10-20 +1920-01-04 +1920-05-05 +1920-06-29 +1920-08-03 +1920-08-17 +1920-10-18 +1921-02-18 +1921-03-13 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-06-01 +1921-07-02 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-11-03 +1921-11-03 +1921-11-03 +1921-11-03 +1921-11-03 +1921-11-03 +1922-01-14 +1922-04-19 +1922-06-21 +1922-07-21 +1923-01-23 +1923-03-07 +1923-03-23 +1923-05-28 +1923-08-11 +1923-08-30 +1923-09-19 +1923-11-14 +1923-12-15 +1924-01-25 +1924-03-15 +1924-05-05 +1924-06-22 +1924-07-03 +1924-11-21 +1924-12-09 +1925-02-16 +1925-06-04 +1925-09-04 +1925-09-07 +1925-10-22 +1925-12-29 +1926-03-29 +1926-04-09 +1926-05-26 +1926-09-06 +1926-12-06 +1927-02-10 +1927-03-26 +1927-04-04 +1927-04-04 +1927-04-04 +1927-04-04 +1927-04-04 +1927-04-04 +1927-05-15 +1927-07-15 +1927-07-25 +1927-08-25 +1927-09-02 +1927-10-31 +1927-11-27 +1928-01-09 +1928-02-24 +1928-05-10 +1928-07-28 +1928-08-26 +1929-03-06 +1929-03-31 +1929-04-04 +1929-05-28 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1930-01-31 +1930-04-08 +1930-04-08 +1930-04-08 +1930-04-08 +1930-06-10 +1930-07-06 +1930-09-27 +1930-12-20 +1931-04-07 +1931-06-30 +1931-08-29 +1931-10-30 +1931-10-30 +1931-12-11 +1932-03-14 +1932-03-29 +1932-04-20 +1932-05-30 +1932-10-26 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-08-26 +1933-08-29 +1933-11-29 +1933-12-21 +1934-06-01 +1934-08-12 +1934-09-11 +1934-11-09 +1934-11-09 +1934-11-09 +1934-11-09 +1934-11-09 +1934-11-09 +1935-04-10 +1936-01-30 +1936-06-22 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-11-03 +1937-01-21 +1937-02-03 +1937-05-29 1937-05-30 -1937-05-31 -1937-06-26 -1937-09-08 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-17 -1938-01-27 +1937-06-25 +1937-09-07 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-16 +1938-01-26 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-26 -1938-05-17 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-10-08 -1939-01-24 -1939-02-19 -1939-03-23 -1939-05-05 -1939-07-13 -1939-08-04 +1938-05-16 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-10-07 +1939-01-23 +1939-02-18 +1939-03-22 +1939-05-04 +1939-07-12 +1939-08-03 +1940-02-09 1940-02-10 -1940-02-11 -1940-03-27 -1940-04-28 -1940-05-01 -1940-08-07 -1940-08-18 -1940-09-24 -1941-03-24 -1941-04-19 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-11-07 -1942-02-16 -1942-03-23 -1943-01-31 -1943-03-30 -1943-05-08 -1943-11-28 -1944-02-28 -1944-05-06 -1945-09-26 -1945-10-08 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1946-01-18 -1946-02-02 -1946-02-04 -1946-04-29 -1946-05-11 -1947-01-05 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-03-03 -1947-03-26 -1947-05-17 -1947-06-25 -1947-10-11 -1947-12-28 -1948-01-27 -1948-05-20 -1948-06-17 -1948-12-27 -1949-06-26 -1949-09-05 -1949-09-20 -1950-01-23 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-23 -1950-04-10 -1950-09-27 -1951-03-31 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-09-06 -1952-02-06 -1952-02-13 -1952-06-23 -1953-01-10 -1953-02-10 -1953-02-23 -1953-02-23 -1953-02-23 -1953-02-23 -1953-02-23 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-05-19 -1953-07-23 -1953-11-27 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-25 -1954-02-21 -1954-05-19 -1954-07-04 -1954-07-07 -1954-10-17 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-12-25 -1955-05-10 -1955-07-22 -1955-08-07 -1955-08-24 -1955-09-23 -1955-11-23 -1956-05-18 -1956-08-06 -1956-10-09 -1957-04-20 -1957-05-03 -1957-08-15 -1957-08-24 -1957-09-05 -1958-07-25 -1958-07-25 -1958-07-25 -1958-07-25 -1958-07-25 -1958-10-13 -1958-10-24 -1959-01-13 -1959-01-24 -1959-03-22 -1959-04-30 -1959-09-08 -1959-09-23 -1959-11-23 -1959-12-21 -1960-01-16 -1960-03-18 -1960-04-05 -1960-07-17 -1960-07-25 -1960-08-30 -1960-11-25 -1961-05-15 -1961-07-10 -1961-07-29 -1961-07-29 -1961-09-25 -1961-10-15 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1962-06-07 -1962-07-08 -1962-08-18 -1962-09-02 -1963-01-08 -1963-03-31 +1940-03-26 +1940-04-27 +1940-04-30 +1940-08-06 +1940-08-17 +1940-09-23 +1941-03-23 +1941-04-18 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-11-06 +1942-02-15 +1942-03-22 +1943-01-30 +1943-03-29 +1943-05-07 +1943-11-27 +1944-02-27 +1944-05-05 +1945-09-25 +1945-10-07 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1946-01-17 +1946-02-01 +1946-02-03 +1946-04-28 +1946-05-10 +1947-01-04 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-03-02 +1947-03-25 +1947-05-16 +1947-06-24 +1947-10-10 +1947-12-27 +1948-01-26 +1948-05-19 +1948-06-16 +1948-12-26 +1949-06-25 +1949-09-04 +1949-09-19 +1950-01-22 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-22 +1950-04-09 +1950-09-26 +1951-03-30 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-09-05 +1952-02-05 +1952-02-12 +1952-06-22 +1953-01-09 +1953-02-09 +1953-02-22 +1953-02-22 +1953-02-22 +1953-02-22 +1953-02-22 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-05-18 +1953-07-22 +1953-11-26 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-24 +1954-02-20 +1954-05-18 +1954-07-03 +1954-07-06 +1954-10-16 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-12-24 +1955-05-09 +1955-07-21 +1955-08-06 +1955-08-23 +1955-09-22 +1955-11-22 +1956-05-17 +1956-08-05 +1956-10-08 +1957-04-19 +1957-05-02 +1957-08-14 +1957-08-23 +1957-09-04 +1958-07-24 +1958-07-24 +1958-07-24 +1958-07-24 +1958-07-24 +1958-10-12 +1958-10-23 +1959-01-12 +1959-01-23 +1959-03-21 +1959-04-29 +1959-09-07 +1959-09-22 +1959-11-22 +1959-12-20 +1960-01-15 +1960-03-17 +1960-04-04 +1960-07-16 +1960-07-24 +1960-08-29 +1960-11-24 +1961-05-14 +1961-07-09 +1961-07-28 +1961-07-28 +1961-09-24 +1961-10-14 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1962-06-06 +1962-07-07 +1962-08-17 +1962-09-01 +1963-01-07 +1963-03-30 +1964-04-14 1964-04-15 -1964-04-16 -1964-10-20 -1964-10-30 -1964-11-08 -1964-12-14 -1965-03-20 -1965-03-27 -1965-05-04 -1965-06-01 -1965-09-19 -1966-02-15 -1966-08-17 -1966-12-01 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1967-03-22 -1967-04-09 -1967-08-09 -1967-09-05 -1968-02-16 +1964-10-19 +1964-10-29 +1964-11-07 +1964-12-13 +1965-03-19 +1965-03-26 +1965-05-03 +1965-05-31 +1965-09-18 +1966-02-14 +1966-08-16 +1966-11-30 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1967-03-21 +1967-04-08 +1967-08-08 +1967-09-04 +1968-02-15 1971-09-22 1971-09-25 1972-02-25 Index: ql/src/test/results/clientpositive/str_to_map.q.java1.7.out =================================================================== --- ql/src/test/results/clientpositive/str_to_map.q.java1.7.out (revision 1673556) +++ ql/src/test/results/clientpositive/str_to_map.q.java1.7.out (working copy) @@ -31,10 +31,10 @@ Select Operator expressions: str_to_map('a=1,b=2,c=3',',','=')['a'] (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 @@ -66,10 +66,10 @@ Select Operator expressions: str_to_map('a:1,b:2,c:3') (type: map) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 460000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 377000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 2262 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select str_to_map('a:1,b:2,c:3') from src limit 3 @@ -101,10 +101,10 @@ Select Operator expressions: str_to_map('a:1,b:2,c:3',',',':') (type: map) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 460000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 377000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 2262 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 Index: ql/src/test/results/clientpositive/subquery_in.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_in.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/subquery_in.q.out (working copy) @@ -254,36 +254,32 @@ TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: int + output shape: _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 - partition by: _col0 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col1 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) @@ -293,7 +289,7 @@ predicate: (_wcol0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int) + expressions: _col5 (type: int) outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -438,46 +434,42 @@ TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: int + output shape: _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 - partition by: _col0 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col1 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col0 is not null) (type: boolean) + predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: int) + expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Group By Operator Index: ql/src/test/results/clientpositive/subquery_in_having.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_in_having.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/subquery_in_having.q.out (working copy) @@ -1332,37 +1332,33 @@ TableScan alias: part_subq Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: p_name (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int + output shape: _col1: string, _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col2 - partition by: _col1 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col0 + arguments: _col1 name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(MAX)~ Index: ql/src/test/results/clientpositive/subquery_notin.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_notin.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/subquery_notin.q.out (working copy) @@ -285,7 +285,7 @@ 199 val_199 199 val_199 2 val_2 -Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, corr explain select p_mfgr, b.p_name, p_size @@ -321,44 +321,40 @@ TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: p_name (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int + output shape: _col1: string, _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col2 - partition by: _col1 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col2 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and (_col0 is null or _col1 is null)) (type: boolean) + predicate: ((_wcol0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE @@ -480,37 +476,33 @@ TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: p_name (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int + output shape: _col1: string, _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col2 - partition by: _col1 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col2 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) @@ -520,7 +512,7 @@ predicate: (_wcol0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -536,7 +528,7 @@ Processor Tree: ListSink -Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select p_mfgr, b.p_name, p_size from part b where b.p_name not in @@ -575,7 +567,7 @@ Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join JOIN[45][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- agg, non corr explain select p_name, p_size @@ -612,36 +604,32 @@ TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: int + output shape: _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 - partition by: _col0 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col1 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) @@ -651,7 +639,7 @@ predicate: (_wcol0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int) + expressions: _col5 (type: int) outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -763,36 +751,32 @@ TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: int + output shape: _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 - partition by: _col0 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col1 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) @@ -802,7 +786,7 @@ predicate: (_wcol0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int) + expressions: _col5 (type: int) outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -859,7 +843,7 @@ Processor Tree: ListSink -Warning: Shuffle Join JOIN[45][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[43][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select p_name, p_size from part where part.p_size not in @@ -906,7 +890,7 @@ almond aquamarine yellow dodger mint 7 almond azure aquamarine papaya violet 12 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join JOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[40][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- agg, corr explain select p_mfgr, p_name, p_size @@ -942,36 +926,32 @@ TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: int + output shape: _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 - partition by: _col0 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col1 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) @@ -981,7 +961,7 @@ predicate: (_wcol0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: int) + expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1137,36 +1117,32 @@ TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: int + output shape: _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 - partition by: _col0 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col1 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) @@ -1176,7 +1152,7 @@ predicate: (_wcol0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: int) + expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1226,7 +1202,7 @@ Processor Tree: ListSink -Warning: Shuffle Join JOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[40][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select p_mfgr, p_name, p_size from part b where b.p_size not in (select min(p_size) @@ -1267,7 +1243,7 @@ Manufacturer#5 almond azure blanched chiffon midnight 23 Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, non corr, Group By in Parent Query select li.l_partkey, count(*) from lineitem li @@ -1466,7 +1442,7 @@ POSTHOOK: Input: default@t1_v POSTHOOK: Output: database:default POSTHOOK: Output: default@T2_v -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) @@ -1611,7 +1587,7 @@ Processor Tree: ListSink -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out (working copy) @@ -207,46 +207,42 @@ TableScan alias: part2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: p2_mfgr (type: string), p2_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p2_mfgr (type: string), p2_size (type: int) + sort order: ++ + Map-reduce partition columns: p2_mfgr (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2, _col5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: int + output shape: _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 - partition by: _col0 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col1 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col0 is not null) (type: boolean) + predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: int) + expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -379,46 +375,42 @@ TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: int + output shape: _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 - partition by: _col0 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col1 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col0 is not null) (type: boolean) + predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: int) + expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -789,7 +781,7 @@ Processor Tree: ListSink -Warning: Shuffle Join JOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, corr explain select p_mfgr, b.p_name, p_size @@ -825,44 +817,40 @@ TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: p_name (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int + output shape: _col1: string, _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col2 - partition by: _col1 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col2 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and (_col0 is null or _col1 is null)) (type: boolean) + predicate: ((_wcol0 <= 2) and (_col1 is null or _col2 is null)) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE @@ -984,37 +972,33 @@ TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: p_name (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: int + output shape: _col1: string, _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col2 - partition by: _col1 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col2 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) @@ -1024,7 +1008,7 @@ predicate: (_wcol0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/subquery_views.q.out =================================================================== --- ql/src/test/results/clientpositive/subquery_views.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/subquery_views.q.out (working copy) @@ -69,8 +69,8 @@ POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@cv2 -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[46][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: explain select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') @@ -378,8 +378,8 @@ Processor Tree: ListSink -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[46][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/table_access_keys_stats.q.out =================================================================== --- ql/src/test/results/clientpositive/table_access_keys_stats.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/table_access_keys_stats.q.out (working copy) @@ -22,7 +22,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_4 +Operator:GBY_3 Table:default@t1 Keys:key @@ -35,7 +35,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_4 +Operator:GBY_3 Table:default@t1 Keys:key,val @@ -50,7 +50,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_4 +Operator:GBY_3 Table:default@t1 Keys:key @@ -63,7 +63,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_4 +Operator:GBY_3 Table:default@t1 Keys:key @@ -77,7 +77,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_4 +Operator:GBY_3 Table:default@t1 Keys:key @@ -90,7 +90,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_4 +Operator:GBY_3 Table:default@t1 Keys:key,val @@ -104,7 +104,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_4 +Operator:GBY_3 Table:default@t1 Keys:key,val @@ -130,7 +130,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_4 +Operator:GBY_3 Table:default@t1 Keys:key @@ -148,11 +148,11 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_4 +Operator:GBY_3 Table:default@t1 Keys:key -Operator:GBY_12 +Operator:GBY_10 Table:default@t1 Keys:key @@ -218,7 +218,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_5 +Operator:GBY_3 Table:default@t1 Keys:key,val @@ -242,7 +242,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### -Operator:GBY_5 +Operator:GBY_3 Table:default@t1 Keys:key,val Index: ql/src/test/results/clientpositive/tez/explainuser_1.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/explainuser_1.q.out (revision 0) +++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out (working copy) @@ -0,0 +1,8846 @@ +PREHOOK: query: explain create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +POSTHOOK: type: CREATETABLE +Stage-0 + Create Table Operator: + columns:["key int","value string"] + input format:org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + name:default.src_orc_merge_test_part + output format:org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition columns:["ds string","ts string"] +PREHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: create table src_orc_merge_test_part(key int, value string) partitioned by (ds string, ts string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_orc_merge_test_part +PREHOOK: query: alter table src_orc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: alter table src_orc_merge_test_part add partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +PREHOOK: query: desc extended src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: query: desc extended src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_orc_merge_test_part +key int +value string +ds string +ts string + +# Partition Information +# col_name data_type comment + +ds string +ts string + +#### A masked pattern was here #### +PREHOOK: query: explain insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + partition:{"ts":"2012-01-03+14:46:31","ds":"2012-01-03"} + table:{"serde:":"org.apache.hadoop.hive.ql.io.orc.OrcSerde","name:":"default.src_orc_merge_test_part","input format:":"org.apache.hadoop.hive.ql.io.orc.OrcInputFormat","output format:":"org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"} + Stage-2 + Dependency Collection{} + Stage-1 + Map 1 + File Output Operator [FS_3] + compressed:false + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.ql.io.orc.OrcSerde","name:":"default.src_orc_merge_test_part","input format:":"org.apache.hadoop.hive.ql.io.orc.OrcInputFormat","output format:":"org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"} + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +POSTHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_orc_merge_test_part PARTITION(ds=2012-01-03,ts=2012-01-03+14:46:31).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain insert into table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + partition:{"ts":"2012-01-03+14:46:31","ds":"2012-01-03"} + table:{"serde:":"org.apache.hadoop.hive.ql.io.orc.OrcSerde","name:":"default.src_orc_merge_test_part","input format:":"org.apache.hadoop.hive.ql.io.orc.OrcInputFormat","output format:":"org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 2 + File Output Operator [FS_7] + compressed:false + Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.ql.io.orc.OrcSerde","name:":"default.src_orc_merge_test_part","input format:":"org.apache.hadoop.hive.ql.io.orc.OrcInputFormat","output format:":"org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"} + Select Operator [SEL_6] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Limit [LIM_5] + Number of rows:100 + Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_4] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + sort order: + Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: string), _col1 (type: string) + Limit [LIM_2] + Number of rows:100 + Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select count(1) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31' +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_8] + compressed:false + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_6] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_4] + aggregations:["count(1)"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_2] + Statistics:Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:src_orc_merge_test_part + Statistics:Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select sum(hash(key)), sum(hash(value)) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31' +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(hash(key)), sum(hash(value)) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31' +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_8] + compressed:false + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_6] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + sort order: + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint), _col1 (type: bigint) + Group By Operator [GBY_4] + aggregations:["sum(_col0)","sum(_col1)"] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_2] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:src_orc_merge_test_part + Statistics:Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: alter table src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@src_orc_merge_test_part +PREHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +POSTHOOK: query: alter table src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: Output: default@src_orc_merge_test_part@ds=2012-01-03/ts=2012-01-03+14%3A46%3A31 +PREHOOK: query: explain select count(1) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31' +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_8] + compressed:false + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_6] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_4] + aggregations:["count(1)"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_2] + Statistics:Num rows: 0 Data size: 2527 Basic stats: PARTIAL Column stats: NONE + TableScan [TS_0] + alias:src_orc_merge_test_part + Statistics:Num rows: 0 Data size: 2527 Basic stats: PARTIAL Column stats: NONE +PREHOOK: query: explain select sum(hash(key)), sum(hash(value)) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31' +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(hash(key)), sum(hash(value)) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31' +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_8] + compressed:false + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_6] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + sort order: + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint), _col1 (type: bigint) + Group By Operator [GBY_4] + aggregations:["sum(_col0)","sum(_col1)"] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_2] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 24 Data size: 2527 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:src_orc_merge_test_part + Statistics:Num rows: 24 Data size: 2527 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: drop table src_orc_merge_test_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_orc_merge_test_part +PREHOOK: Output: default@src_orc_merge_test_part +POSTHOOK: query: drop table src_orc_merge_test_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_orc_merge_test_part +POSTHOOK: Output: default@src_orc_merge_test_part +Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +select src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (select * FROM src WHERE src.key < 10) src1 + JOIN + (select * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +) a +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +select src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (select * FROM src WHERE src.key < 10) src1 + JOIN + (select * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +) a +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Map 1 <- Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_17] + compressed:false + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_15] + | aggregations:["sum(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_14] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_13] + aggregations:["sum(hash(_col0,_col1,_col2,_col3))"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_11] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order:++++ + Statistics:Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_20] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{} + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_7] + | sort order: + | Statistics:Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col1 (type: string) + | Select Operator [SEL_5] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_19] + | predicate:(key < 10) (type: boolean) + | Statistics:Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:src + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_2] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_18] + predicate:(key < 10) (type: boolean) + Statistics:Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_7] + compressed:false + Statistics:Num rows: 10 Data size: 885 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_6] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 10 Data size: 885 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_5] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_3] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select x, y, count(*) from (select key, (c_int+c_float+1+2) as x, sum(c_int) as y from cbo_t1 group by c_float, cbo_t1.c_int, key) R group by y, x +PREHOOK: type: QUERY +POSTHOOK: query: explain select x, y, count(*) from (select key, (c_int+c_float+1+2) as x, sum(c_int) as y from cbo_t1 group by c_float, cbo_t1.c_int, key) R group by y, x +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_12] + compressed:false + Statistics:Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_11] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_10] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: bigint), KEY._col1 (type: float) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_9] + key expressions:_col0 (type: bigint), _col1 (type: float) + Map-reduce partition columns:_col0 (type: bigint), _col1 (type: float) + sort order:++ + Statistics:Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col2 (type: bigint) + Group By Operator [GBY_8] + aggregations:["count()"] + keys:_col0 (type: bigint), _col1 (type: float) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_6] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_5] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_3] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key order by a) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key order by q/10 desc, r asc) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c order by cbo_t3.c_int+c desc, c +PREHOOK: type: QUERY +POSTHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key order by a) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key order by q/10 desc, r asc) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c order by cbo_t3.c_int+c desc, c +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 10 <- Map 9 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_47] + compressed:false + Statistics:Num rows: 6 Data size: 494 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_46] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 6 Data size: 494 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_45] + key expressions:(UDFToLong(_col0) + _col1) (type: bigint), _col1 (type: bigint) + sort order:-+ + Statistics:Num rows: 6 Data size: 494 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: int), _col2 (type: bigint) + Group By Operator [GBY_43] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 6 Data size: 494 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_42] + key expressions:_col0 (type: int), _col1 (type: bigint) + Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) + sort order:++ + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: bigint) + Group By Operator [GBY_41] + aggregations:["count()"] + keys:_col0 (type: int), _col1 (type: bigint) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_37] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_53] + predicate:((_col3 > 0) or (_col1 >= 0)) (type: boolean) + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_59] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col3","_col4"] + | Statistics:Num rows: 19 Data size: 1566 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_33] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_54] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t3 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_35] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: bigint) + Select Operator [SEL_29] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_55] + predicate:((_col3 + _col1) >= 0) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_58] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 11 [SIMPLE_EDGE] + | Reduce Output Operator [RS_27] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: bigint) + | Select Operator [SEL_22] + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Reducer 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_21] + | key expressions:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: bigint) + | Select Operator [SEL_20] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_19] + | | aggregations:["sum(VALUE._col0)"] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 9 [SIMPLE_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col3 (type: bigint) + | Group By Operator [GBY_17] + | aggregations:["sum(_col1)"] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_15] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_57] + | predicate:((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) (type: boolean) + | Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_13] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_25] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_11] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col3 (type: double), _col2 (type: bigint) + sort order:-+ + Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string), _col1 (type: int) + Select Operator [SEL_9] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_8] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_6] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_4] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_56] + predicate:((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) (type: boolean) + Statistics:Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_2] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc) cbo_t1 left outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p left outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int % c asc, cbo_t3.c_int desc +PREHOOK: type: QUERY +POSTHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc) cbo_t1 left outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p left outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int % c asc, cbo_t3.c_int desc +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 + File Output Operator [FS_42] + compressed:false + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_41] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_40] + key expressions:(UDFToLong(_col0) % _col1) (type: bigint), _col0 (type: int) + sort order:+- + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions:_col1 (type: bigint), _col2 (type: bigint) + Group By Operator [GBY_38] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_37] + key expressions:_col0 (type: int), _col1 (type: bigint) + Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) + sort order:++ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions:_col2 (type: bigint) + Group By Operator [GBY_36] + aggregations:["count()"] + keys:_col0 (type: int), _col1 (type: bigint) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator [SEL_34] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator [FIL_33] + predicate:(((_col1 > 0) or (_col6 >= 0)) and (((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0))) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Merge Join Operator [MERGEJOIN_52] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col6"] + | Statistics:Num rows: 5 Data size: 391 Basic stats: COMPLETE Column stats: NONE + |<-Map 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_31] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 5 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_27] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 5 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_50] + | predicate:((c_int > 0) and key is not null) (type: boolean) + | Statistics:Num rows: 5 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_25] + | alias:cbo_t3 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_29] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions:_col1 (type: int), _col2 (type: bigint) + Select Operator [SEL_22] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator [FIL_47] + predicate:((_col3 + _col1) >= 0) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Merge Join Operator [MERGEJOIN_51] + | condition map:[{"":"Right Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_19] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_7] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Group By Operator [GBY_6] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Group By Operator [GBY_4] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Filter Operator [FIL_48] + | predicate:(((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t2 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_20] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: bigint) + Select Operator [SEL_17] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_16] + key expressions:_col3 (type: bigint), _col1 (type: int) + sort order:+- + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col0 (type: string), _col2 (type: bigint) + Select Operator [SEL_15] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator [GBY_14] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + |<-Map 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_13] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_12] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator [SEL_10] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_49] + predicate:(((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + TableScan [TS_8] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b+c, a desc) cbo_t1 right outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 2) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c +PREHOOK: type: QUERY +POSTHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b+c, a desc) cbo_t1 right outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 2) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 5 + File Output Operator [FS_35] + compressed:false + Statistics:Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_33] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_32] + key expressions:_col0 (type: int), _col1 (type: bigint) + Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) + sort order:++ + Statistics:Num rows: 4 Data size: 320 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: bigint) + Group By Operator [GBY_31] + aggregations:["count()"] + keys:_col0 (type: int), _col1 (type: bigint) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 4 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_29] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 4 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_39] + predicate:(((_col1 + _col4) >= 2) and ((_col1 > 0) or (_col6 >= 0))) (type: boolean) + Statistics:Num rows: 4 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_44] + | condition map:[{"":"Right Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col4","_col6"] + | Statistics:Num rows: 22 Data size: 1762 Basic stats: COMPLETE Column stats: NONE + |<-Map 9 [SIMPLE_EDGE] + | Reduce Output Operator [RS_26] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_24] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_23] + | alias:cbo_t3 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_25] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions:_col1 (type: int), _col2 (type: bigint), _col4 (type: int) + Select Operator [SEL_22] + outputColumnNames:["_col0","_col1","_col2","_col4"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Merge Join Operator [MERGEJOIN_43] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_19] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_7] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Group By Operator [GBY_6] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Group By Operator [GBY_4] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Filter Operator [FIL_41] + | predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t2 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_20] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: bigint) + Select Operator [SEL_17] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + |<-Reducer 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_16] + key expressions:_col3 (type: bigint), _col0 (type: string) + sort order:+- + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: bigint) + Select Operator [SEL_15] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator [GBY_14] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + |<-Map 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_13] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_12] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator [SEL_10] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_42] + predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + TableScan [TS_8] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by c+a desc) cbo_t1 full outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by p+q desc, r asc) cbo_t2 on cbo_t1.a=p full outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int +PREHOOK: type: QUERY +POSTHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by c+a desc) cbo_t1 full outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by p+q desc, r asc) cbo_t2 on cbo_t1.a=p full outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 7 + File Output Operator [FS_41] + compressed:false + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_40] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_39] + key expressions:_col0 (type: int) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions:_col1 (type: bigint), _col2 (type: bigint) + Group By Operator [GBY_37] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_36] + key expressions:_col0 (type: int), _col1 (type: bigint) + Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) + sort order:++ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions:_col2 (type: bigint) + Group By Operator [GBY_35] + aggregations:["count()"] + keys:_col0 (type: int), _col1 (type: bigint) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator [SEL_33] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator [FIL_44] + predicate:(((((_col1 + _col4) >= 0) and ((_col1 > 0) or (_col6 >= 0))) and ((_col6 >= 1) or (_col2 >= 1))) and ((UDFToLong(_col6) + _col2) >= 0)) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Merge Join Operator [MERGEJOIN_50] + | condition map:[{"":"Right Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col4","_col6"] + | Statistics:Num rows: 6 Data size: 489 Basic stats: COMPLETE Column stats: NONE + |<-Map 11 [SIMPLE_EDGE] + | Reduce Output Operator [RS_30] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_28] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 6 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_48] + | predicate:(c_int > 0) (type: boolean) + | Statistics:Num rows: 6 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_26] + | alias:cbo_t3 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_29] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions:_col1 (type: int), _col2 (type: bigint), _col4 (type: int) + Select Operator [SEL_25] + outputColumnNames:["_col0","_col1","_col2","_col4"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Merge Join Operator [MERGEJOIN_49] + | condition map:[{"":"Outer Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Reducer 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_23] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: bigint) + | Select Operator [SEL_20] + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | |<-Reducer 9 [SIMPLE_EDGE] + | Reduce Output Operator [RS_19] + | key expressions:_col3 (type: double) + | sort order:- + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col1 (type: int), _col2 (type: bigint) + | Select Operator [SEL_18] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Group By Operator [GBY_17] + | | aggregations:["sum(VALUE._col0)"] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | |<-Map 8 [SIMPLE_EDGE] + | Reduce Output Operator [RS_16] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | value expressions:_col3 (type: bigint) + | Group By Operator [GBY_15] + | aggregations:["sum(_col1)"] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Select Operator [SEL_13] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Filter Operator [FIL_47] + | predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | TableScan [TS_11] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_22] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_9] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + key expressions:_col3 (type: double), _col2 (type: bigint) + sort order:-+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col0 (type: string), _col1 (type: int) + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator [GBY_6] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_4] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator [SEL_2] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_46] + predicate:((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + TableScan [TS_0] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c +PREHOOK: type: QUERY +POSTHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_39] + compressed:false + Statistics:Num rows: 6 Data size: 494 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_37] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 6 Data size: 494 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_36] + key expressions:_col0 (type: int), _col1 (type: bigint) + Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) + sort order:++ + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: bigint) + Group By Operator [GBY_35] + aggregations:["count()"] + keys:_col0 (type: int), _col1 (type: bigint) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_31] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_45] + predicate:((_col3 > 0) or (_col1 >= 0)) (type: boolean) + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_51] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col3","_col4"] + | Statistics:Num rows: 19 Data size: 1566 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_27] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_46] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t3 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_29] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions:_col1 (type: int), _col2 (type: bigint) + Select Operator [SEL_23] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator [FIL_47] + predicate:((_col3 + _col1) >= 0) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Merge Join Operator [MERGEJOIN_50] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Reducer 5 [SIMPLE_EDGE] + | Reduce Output Operator [RS_19] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_9] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Group By Operator [GBY_8] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | |<-Map 4 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Group By Operator [GBY_6] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Select Operator [SEL_4] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Filter Operator [FIL_48] + | predicate:(((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | TableScan [TS_2] + | alias:cbo_t2 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_21] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: bigint) + Select Operator [SEL_17] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator [GBY_16] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + |<-Map 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_15] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_14] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator [SEL_12] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_49] + predicate:(((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + TableScan [TS_10] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc +PREHOOK: type: QUERY +POSTHOOK: query: explain select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_8] + compressed:false + Statistics:Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_6] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_5] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_3] + aggregations:["count(1)"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_1] + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select unionsrc.key FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc order by unionsrc.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select unionsrc.key FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc order by unionsrc.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 4 <- Union 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_29] + compressed:false + Statistics:Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_28] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Union 3 [SIMPLE_EDGE] + |<-Reducer 2 [CONTAINS] + | Reduce Output Operator [RS_27] + | key expressions:_col0 (type: string) + | sort order:+ + | Select Operator [SEL_6] + | outputColumnNames:["_col0"] + | Group By Operator [GBY_5] + | | aggregations:["count(VALUE._col0)"] + | | outputColumnNames:["_col0"] + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_4] + | sort order: + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: bigint) + | Group By Operator [GBY_3] + | aggregations:["count(_col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_1] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:s1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 6 [CONTAINS] + | Reduce Output Operator [RS_27] + | key expressions:_col0 (type: string) + | sort order:+ + | Select Operator [SEL_14] + | outputColumnNames:["_col0"] + | Group By Operator [GBY_13] + | | aggregations:["count(VALUE._col0)"] + | | outputColumnNames:["_col0"] + | |<-Map 5 [SIMPLE_EDGE] + | Reduce Output Operator [RS_12] + | sort order: + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: bigint) + | Group By Operator [GBY_11] + | aggregations:["count(_col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_9] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_8] + | alias:s1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 8 [CONTAINS] + Reduce Output Operator [RS_27] + key expressions:_col0 (type: string) + sort order:+ + Select Operator [SEL_24] + outputColumnNames:["_col0"] + Group By Operator [GBY_23] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + |<-Map 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_22] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_21] + aggregations:["count(_col0)"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_19] + outputColumnNames:["_col0"] + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_18] + alias:s1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc group by unionsrc.key order by unionsrc.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 + UNION ALL + select 'min' as key, min(c_int) as value from cbo_t3 s2 + UNION ALL + select 'avg' as key, avg(c_int) as value from cbo_t3 s3) unionsrc group by unionsrc.key order by unionsrc.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 3 (CONTAINS) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 5 + File Output Operator [FS_34] + compressed:false + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_33] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_32] + key expressions:_col0 (type: string) + sort order:+ + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: bigint) + Group By Operator [GBY_30] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Union 3 [SIMPLE_EDGE] + |<-Reducer 2 [CONTAINS] + | Reduce Output Operator [RS_29] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | value expressions:_col1 (type: bigint) + | Group By Operator [GBY_28] + | aggregations:["count(1)"] + | keys:_col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_6] + | outputColumnNames:["_col0"] + | Group By Operator [GBY_5] + | | aggregations:["count(VALUE._col0)"] + | | outputColumnNames:["_col0"] + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_4] + | sort order: + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: bigint) + | Group By Operator [GBY_3] + | aggregations:["count(_col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_1] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:s1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 7 [CONTAINS] + | Reduce Output Operator [RS_29] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | value expressions:_col1 (type: bigint) + | Group By Operator [GBY_28] + | aggregations:["count(1)"] + | keys:_col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_14] + | outputColumnNames:["_col0"] + | Group By Operator [GBY_13] + | | aggregations:["count(VALUE._col0)"] + | | outputColumnNames:["_col0"] + | |<-Map 6 [SIMPLE_EDGE] + | Reduce Output Operator [RS_12] + | sort order: + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: bigint) + | Group By Operator [GBY_11] + | aggregations:["count(_col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_9] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_8] + | alias:s1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 9 [CONTAINS] + Reduce Output Operator [RS_29] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + value expressions:_col1 (type: bigint) + Group By Operator [GBY_28] + aggregations:["count(1)"] + keys:_col0 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_24] + outputColumnNames:["_col0"] + Group By Operator [GBY_23] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + |<-Map 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_22] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_21] + aggregations:["count(_col0)"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_19] + outputColumnNames:["_col0"] + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_18] + alias:s1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select cbo_t1.key from cbo_t1 join cbo_t3 where cbo_t1.key=cbo_t3.key and cbo_t1.key >= 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select cbo_t1.key from cbo_t1 join cbo_t3 where cbo_t1.key=cbo_t3.key and cbo_t1.key >= 1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_12] + compressed:false + Statistics:Num rows: 18 Data size: 1530 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_17] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0"] + | Statistics:Num rows: 18 Data size: 1530 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_15] + | predicate:(UDFToDouble(key) >= 1.0) (type: boolean) + | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_9] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_5] + outputColumnNames:["_col0"] + Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_16] + predicate:(UDFToDouble(key) >= 1.0) (type: boolean) + Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_3] + alias:cbo_t3 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 left outer join cbo_t2 on cbo_t1.key=cbo_t2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 left outer join cbo_t2 on cbo_t1.key=cbo_t2.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_8] + compressed:false + Statistics:Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_11] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col3"] + | Statistics:Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_4] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_3] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_2] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_8] + compressed:false + Statistics:Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_9] + | condition map:[{"":"Outer Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col3"] + | Statistics:Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_4] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_3] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_2] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + compressed:false + Statistics:Num rows: 291 Data size: 29391 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_13] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + Statistics:Num rows: 291 Data size: 29391 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_24] + | condition map:[{"":"Inner Join 0 to 1"},{"":"Inner Join 0 to 2"}] + | keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col4","_col5","_col6"] + | Statistics:Num rows: 291 Data size: 29391 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 18 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: float) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 18 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_21] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 18 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_9] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_22] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_2] + | alias:cbo_t3 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_11] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_23] + predicate:key is not null (type: boolean) + Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_4] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + compressed:false + Statistics:Num rows: 291 Data size: 51798 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_13] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 291 Data size: 51798 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_24] + | condition map:[{"":"Inner Join 0 to 1"},{"":"Inner Join 0 to 2"}] + | keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1","_col3","_col4"] + | Statistics:Num rows: 291 Data size: 51798 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_21] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_9] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_3] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_22] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_2] + | alias:cbo_t3 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_11] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_23] + predicate:key is not null (type: boolean) + Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_4] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select * from (select q, b, cbo_t2.p, cbo_t1.c, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q == 2) and (b > 0 or c_int >= 0)) R where (q + 1 = 2) and (R.b > 0 or c_int >= 0) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from (select q, b, cbo_t2.p, cbo_t1.c, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q == 2) and (b > 0 or c_int >= 0)) R where (q + 1 = 2) and (R.b > 0 or c_int >= 0) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_20] + compressed:false + Statistics:Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_19] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + Statistics:Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_18] + predicate:((_col1 > 0) or (_col6 >= 0)) (type: boolean) + Statistics:Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_29] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3","_col4","_col6"] + | Statistics:Num rows: 14 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 5 [SIMPLE_EDGE] + | Reduce Output Operator [RS_16] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_12] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_27] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_11] + | alias:cbo_t3 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_14] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 4 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: float), _col3 (type: string), _col4 (type: int) + Filter Operator [FIL_24] + predicate:((((_col1 + _col4) = 2) and ((_col4 + 1) = 2)) and _col0 is not null) (type: boolean) + Statistics:Num rows: 4 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_28] + | condition map:[{"":"Outer Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 18 Data size: 3276 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_6] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: float) + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 6 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_25] + | predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) (type: boolean) + | Statistics:Num rows: 6 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 6 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 6 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_26] + predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) (type: boolean) + Statistics:Num rows: 6 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_3] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select * from (select q, b, cbo_t2.p, cbo_t1.c, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 right outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q == 2) and (b > 0 or c_int >= 0)) R where (q + 1 = 2) and (R.b > 0 or c_int >= 0) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from (select q, b, cbo_t2.p, cbo_t1.c, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 right outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q == 2) and (b > 0 or c_int >= 0)) R where (q + 1 = 2) and (R.b > 0 or c_int >= 0) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + compressed:false + Statistics:Num rows: 12 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_13] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + Statistics:Num rows: 12 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_17] + predicate:((((_col1 + _col4) = 2) and ((_col1 > 0) or (_col6 >= 0))) and ((_col4 + 1) = 2)) (type: boolean) + Statistics:Num rows: 12 Data size: 1212 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_20] + | condition map:[{"":"Right Outer Join0 to 1"},{"":"Right Outer Join0 to 2"}] + | keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3","_col4","_col6"] + | Statistics:Num rows: 72 Data size: 7272 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_8] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: float) + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 6 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_18] + | predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) (type: boolean) + | Statistics:Num rows: 6 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_9] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_5] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 6 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_19] + | predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) (type: boolean) + | Statistics:Num rows: 6 Data size: 465 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_3] + | alias:cbo_t2 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_6] + alias:cbo_t3 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key order by x limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key order by x limit 1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:1 + Stage-1 + Reducer 3 + File Output Operator [FS_10] + compressed:false + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Limit [LIM_9] + Number of rows:1 + Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_8] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 10 Data size: 885 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col1 (type: int) + sort order:+ + Statistics:Num rows: 10 Data size: 885 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string), _col2 (type: bigint) + Select Operator [SEL_6] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 10 Data size: 885 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_5] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_3] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select x, y, count(*) from (select key, (c_int+c_float+1+2) as x, sum(c_int) as y from cbo_t1 group by c_float, cbo_t1.c_int, key) R group by y, x order by x,y limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select x, y, count(*) from (select key, (c_int+c_float+1+2) as x, sum(c_int) as y from cbo_t1 group by c_float, cbo_t1.c_int, key) R group by y, x order by x,y limit 1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:1 + Stage-1 + Reducer 4 + File Output Operator [FS_15] + compressed:false + Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Limit [LIM_14] + Number of rows:1 + Statistics:Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_13] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + key expressions:_col0 (type: float), _col1 (type: bigint) + sort order:++ + Statistics:Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col2 (type: bigint) + Select Operator [SEL_11] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_10] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: bigint), KEY._col1 (type: float) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_9] + key expressions:_col0 (type: bigint), _col1 (type: float) + Map-reduce partition columns:_col0 (type: bigint), _col1 (type: float) + sort order:++ + Statistics:Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col2 (type: bigint) + Group By Operator [GBY_8] + aggregations:["count()"] + keys:_col0 (type: bigint), _col1 (type: float) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_6] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_5] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_3] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 10 Data size: 917 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select key from(select key from (select key from cbo_t1 limit 5)cbo_t2 limit 5)cbo_t3 limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from(select key from (select key from cbo_t1 limit 5)cbo_t2 limit 5)cbo_t3 limit 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 + File Output Operator [FS_13] + compressed:false + Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Limit [LIM_12] + Number of rows:5 + Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Limit [LIM_10] + Number of rows:5 + Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_9] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + sort order: + Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string) + Limit [LIM_7] + Number of rows:5 + Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Limit [LIM_5] + Number of rows:5 + Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_4] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + sort order: + Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string) + Limit [LIM_2] + Number of rows:5 + Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_1] + outputColumnNames:["_col0"] + Statistics:Num rows: 20 Data size: 1530 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select key, c_int from(select key, c_int from (select key, c_int from cbo_t1 order by c_int limit 5)cbo_t1 order by c_int limit 5)cbo_t2 order by c_int limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, c_int from(select key, c_int from (select key, c_int from cbo_t1 order by c_int limit 5)cbo_t1 order by c_int limit 5)cbo_t2 order by c_int limit 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_13] + compressed:false + Statistics:Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Limit [LIM_12] + Number of rows:5 + Statistics:Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_11] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col1 (type: int) + sort order:+ + Statistics:Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col0 (type: string) + Limit [LIM_8] + Number of rows:5 + Statistics:Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_7] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_6] + key expressions:_col1 (type: int) + sort order:+ + Statistics:Num rows: 5 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string) + Limit [LIM_4] + Number of rows:5 + Statistics:Num rows: 5 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_2] + key expressions:_col1 (type: int) + sort order:+ + Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string) + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 20 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key order by a limit 5) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key order by q/10 desc, r asc limit 5) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c order by cbo_t3.c_int+c desc, c limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key order by a limit 5) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key order by q/10 desc, r asc limit 5) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c order by cbo_t3.c_int+c desc, c limit 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 10 <- Map 9 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_50] + compressed:false + Statistics:Num rows: 5 Data size: 410 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Limit [LIM_49] + Number of rows:5 + Statistics:Num rows: 5 Data size: 410 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_48] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 6 Data size: 494 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_47] + key expressions:(UDFToLong(_col0) + _col1) (type: bigint), _col1 (type: bigint) + sort order:-+ + Statistics:Num rows: 6 Data size: 494 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: int), _col2 (type: bigint) + Group By Operator [GBY_45] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: int), KEY._col1 (type: bigint) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 6 Data size: 494 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_44] + key expressions:_col0 (type: int), _col1 (type: bigint) + Map-reduce partition columns:_col0 (type: int), _col1 (type: bigint) + sort order:++ + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: bigint) + Group By Operator [GBY_43] + aggregations:["count()"] + keys:_col0 (type: int), _col1 (type: bigint) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_39] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_56] + predicate:((_col3 > 0) or (_col1 >= 0)) (type: boolean) + Statistics:Num rows: 12 Data size: 989 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_64] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col3","_col4"] + | Statistics:Num rows: 19 Data size: 1566 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_35] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_57] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 18 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t3 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_37] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col1 (type: int), _col2 (type: bigint) + Select Operator [SEL_31] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_58] + predicate:((_col3 + _col1) >= 0) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_63] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 11 [SIMPLE_EDGE] + | Reduce Output Operator [RS_29] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: bigint) + | Filter Operator [FIL_61] + | predicate:_col0 is not null (type: boolean) + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | Limit [LIM_24] + | Number of rows:5 + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_23] + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Reducer 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_22] + | key expressions:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: bigint) + | Select Operator [SEL_21] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_20] + | | aggregations:["sum(VALUE._col0)"] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 9 [SIMPLE_EDGE] + | Reduce Output Operator [RS_19] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col3 (type: bigint) + | Group By Operator [GBY_18] + | aggregations:["sum(_col1)"] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_16] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_62] + | predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) (type: boolean) + | Statistics:Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_14] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_27] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: int) + Filter Operator [FIL_59] + predicate:_col0 is not null (type: boolean) + Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Limit [LIM_12] + Number of rows:5 + Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_11] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col3 (type: double), _col2 (type: bigint) + sort order:-+ + Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string), _col1 (type: int) + Select Operator [SEL_9] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_8] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_6] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_4] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_60] + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) (type: boolean) + Statistics:Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_2] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select cbo_t1.c_int from cbo_t1 left semi join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) +PREHOOK: type: QUERY +POSTHOOK: query: explain select cbo_t1.c_int from cbo_t1 left semi join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_13] + compressed:false + Statistics:Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_12] + outputColumnNames:["_col0"] + Statistics:Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_18] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + | Statistics:Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_8] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 5 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int) + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 5 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_16] + | predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) (type: boolean) + | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_6] + keys:_col0 (type: string) + outputColumnNames:["_col0"] + Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_4] + outputColumnNames:["_col0"] + Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_17] + predicate:key is not null (type: boolean) + Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_3] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select * from (select c, b, a from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 left semi join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p left semi join cbo_t3 on cbo_t1.a=key where (b + 1 == 2) and (b > 0 or c >= 0)) R where (b + 1 = 2) and (R.b > 0 or c >= 0) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from (select c, b, a from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 left semi join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p left semi join cbo_t3 on cbo_t1.a=key where (b + 1 == 2) and (b > 0 or c >= 0)) R where (b + 1 = 2) and (R.b > 0 or c >= 0) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_20] + compressed:false + Statistics:Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_19] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_30] + | condition map:[{"":"Left Semi Join 0 to 1"},{"":"Left Semi Join 0 to 2"}] + | keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_13] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: int), _col2 (type: float) + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_27] + | predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) (type: boolean) + | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_15] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_9] + | keys:_col0 (type: string) + | outputColumnNames:["_col0"] + | Statistics:Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 5 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_28] + | predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) (type: boolean) + | Statistics:Num rows: 5 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_3] + | alias:cbo_t2 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_17] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_11] + keys:_col0 (type: string) + outputColumnNames:["_col0"] + Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_7] + outputColumnNames:["_col0"] + Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_29] + predicate:key is not null (type: boolean) + Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_6] + alias:cbo_t3 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select a, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by a+b desc, c asc) cbo_t1 left semi join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by q+r/10 desc, p) cbo_t2 on cbo_t1.a=p left semi join cbo_t3 on cbo_t1.a=key where (b + 1 >= 0) and (b > 0 or a >= 0) group by a, c having a > 0 and (a >=1 or c >= 1) and (a + c) >= 0 order by c, a +PREHOOK: type: QUERY +POSTHOOK: query: explain select a, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by a+b desc, c asc) cbo_t1 left semi join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by q+r/10 desc, p) cbo_t2 on cbo_t1.a=p left semi join cbo_t3 on cbo_t1.a=key where (b + 1 >= 0) and (b > 0 or a >= 0) group by a, c having a > 0 and (a >=1 or c >= 1) and (a + c) >= 0 order by c, a +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 + File Output Operator [FS_44] + compressed:false + Statistics:Num rows: 6 Data size: 431 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_43] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 6 Data size: 431 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_42] + key expressions:_col1 (type: bigint), _col0 (type: string) + sort order:++ + Statistics:Num rows: 6 Data size: 431 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: bigint) + Group By Operator [GBY_40] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: string), KEY._col1 (type: bigint) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 6 Data size: 431 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_39] + key expressions:_col0 (type: string), _col1 (type: bigint) + Map-reduce partition columns:_col0 (type: string), _col1 (type: bigint) + sort order:++ + Statistics:Num rows: 13 Data size: 935 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: bigint) + Group By Operator [GBY_38] + aggregations:["count()"] + keys:_col0 (type: string), _col1 (type: bigint) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 13 Data size: 935 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_56] + | condition map:[{"":"Left Semi Join 0 to 1"},{"":"Left Semi Join 0 to 2"}] + | keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 13 Data size: 935 Basic stats: COMPLETE Column stats: NONE + |<-Map 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_35] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_29] + | keys:_col0 (type: string) + | outputColumnNames:["_col0"] + | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_25] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_55] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_24] + | alias:cbo_t3 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_31] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | value expressions:_col1 (type: bigint) + | Select Operator [SEL_9] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_8] + | key expressions:_col3 (type: double), _col2 (type: bigint) + | sort order:-+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | value expressions:_col0 (type: string) + | Filter Operator [FIL_51] + | predicate:(((_col1 + 1) >= 0) and ((_col1 > 0) or (UDFToDouble(_col0) >= 0.0))) (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Select Operator [SEL_7] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Filter Operator [FIL_52] + | predicate:(((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0) and ((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1))) (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Group By Operator [GBY_6] + | | aggregations:["sum(VALUE._col0)"] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | value expressions:_col3 (type: bigint) + | Group By Operator [GBY_4] + | aggregations:["sum(_col1)"] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | Filter Operator [FIL_53] + | predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and (UDFToDouble(key) > 0.0)) and key is not null) (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_33] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator [GBY_27] + keys:_col0 (type: string) + outputColumnNames:["_col0"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator [SEL_22] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_21] + key expressions:_col1 (type: double), _col0 (type: string) + sort order:-+ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator [SEL_20] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator [GBY_19] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + |<-Map 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_18] + key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions:_col3 (type: bigint) + Group By Operator [GBY_17] + aggregations:["sum(_col1)"] + keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator [SEL_15] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_54] + predicate:(((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + TableScan [TS_13] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1","_col2"] + TableScan [TS_0] + alias:cbo_t1 +PREHOOK: query: explain select null from cbo_t1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select null from cbo_t1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Select Operator [SEL_1] + outputColumnNames:["_col0"] + TableScan [TS_0] + alias:cbo_t1 +PREHOOK: query: explain select key from cbo_t1 where c_int = -6 or c_int = +6 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from cbo_t1 where c_int = -6 or c_int = +6 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Select Operator [SEL_2] + outputColumnNames:["_col0"] + Filter Operator [FIL_4] + predicate:((c_int = -6) or (c_int = 6)) (type: boolean) + TableScan [TS_0] + alias:cbo_t1 +PREHOOK: query: explain select count(cbo_t1.dt) from cbo_t1 join cbo_t2 on cbo_t1.dt = cbo_t2.dt where cbo_t1.dt = '2014' +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(cbo_t1.dt) from cbo_t1 join cbo_t2 on cbo_t1.dt = cbo_t2.dt where cbo_t1.dt = '2014' +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_16] + compressed:false + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_14] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_13] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_12] + aggregations:["count(_col0)"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_10] + outputColumnNames:["_col0"] + Statistics:Num rows: 22 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Merge Join Operator [MERGEJOIN_20] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{} + | Statistics:Num rows: 22 Data size: 0 Basic stats: PARTIAL Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | sort order: + | Statistics:Num rows: 20 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + | Select Operator [SEL_2] + | Statistics:Num rows: 20 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + sort order: + Statistics:Num rows: 20 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_5] + Statistics:Num rows: 20 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan [TS_3] + alias:cbo_t2 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select * +from src_cbo b +where not exists + (select distinct a.key + from src_cbo a + where b.value = a.value and a.value > 'val_2' + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from src_cbo b +where not exists + (select distinct a.key + from src_cbo a + where b.value = a.value and a.value > 'val_2' + ) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_16] + compressed:false + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_15] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_18] + predicate:_col2 is null (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_20] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 193 Data size: 51917 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_11] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string) + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:b + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 83 Data size: 7553 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_10] + outputColumnNames:["_col0"] + Statistics:Num rows: 83 Data size: 7553 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_9] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_7] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_19] + predicate:(value > 'val_2') (type: boolean) + Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_3] + alias:b + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select * +from src_cbo b +group by key, value +having not exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from src_cbo b +group by key, value +having not exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_16] + compressed:false + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_15] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_18] + predicate:_col3 is null (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_20] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: string), _col1 (type: string)","0":"_col1 (type: string), _col0 (type: string)"} + | outputColumnNames:["_col0","_col1","_col3"] + | Statistics:Num rows: 8 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + | Reduce Output Operator [RS_12] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_10] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_19] + | predicate:(value > 'val_12') (type: boolean) + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_8] + | alias:b + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_11] + key expressions:_col1 (type: string), _col0 (type: string) + Map-reduce partition columns:_col1 (type: string), _col0 (type: string) + sort order:++ + Statistics:Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_6] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_4] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:b + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: create view cv1 as +select * +from src_cbo b +where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src_cbo +PREHOOK: Output: database:default +PREHOOK: Output: default@cv1 +POSTHOOK: query: create view cv1 as +select * +from src_cbo b +where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src_cbo +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cv1 +PREHOOK: query: explain select * from cv1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from cv1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + compressed:false + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_19] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: string), _col1 (type: string)","0":"_col1 (type: string), _col0 (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_9] + | key expressions:_col1 (type: string), _col0 (type: string) + | Map-reduce partition columns:_col1 (type: string), _col0 (type: string) + | sort order:++ + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_17] + | predicate:(value is not null and key is not null) (type: boolean) + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:b + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_11] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_7] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_18] + predicate:((value > 'val_9') and key is not null) (type: boolean) + Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_3] + alias:b + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select * +from (select * + from src_cbo b + where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from (select * + from src_cbo b + where exists + (select a.key + from src_cbo a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + compressed:false + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_19] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: string), _col1 (type: string)","0":"_col1 (type: string), _col0 (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_9] + | key expressions:_col1 (type: string), _col0 (type: string) + | Map-reduce partition columns:_col1 (type: string), _col0 (type: string) + | sort order:++ + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_17] + | predicate:(value is not null and key is not null) (type: boolean) + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:b + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_11] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_7] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_18] + predicate:((value > 'val_9') and key is not null) (type: boolean) + Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_3] + alias:b + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select * +from src_cbo +where src_cbo.key in (select key from src_cbo s1 where s1.key > '9') +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from src_cbo +where src_cbo.key in (select key from src_cbo s1 where s1.key > '9') +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + compressed:false + Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_19] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_9] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: string) + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_17] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:src_cbo + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_11] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_7] + keys:_col0 (type: string) + outputColumnNames:["_col0"] + Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_5] + outputColumnNames:["_col0"] + Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_18] + predicate:(key > '9') (type: boolean) + Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_3] + alias:src_cbo + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +PREHOOK: type: QUERY +POSTHOOK: query: explain select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_26] + compressed:false + Statistics:Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_25] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_36] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: int)","0":"_col1 (type: int)"} + | outputColumnNames:["_col1","_col2"] + | Statistics:Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_21] + | key expressions:_col1 (type: int) + | Map-reduce partition columns:_col1 (type: int) + | sort order:+ + | Statistics:Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col2 (type: int) + | Merge Join Operator [MERGEJOIN_35] + | | condition map:[{"":"Left Semi Join 0 to 1"}] + | | keys:{"1":"_col0 (type: int), _col1 (type: int)","0":"_col0 (type: int), _col3 (type: int)"} + | | outputColumnNames:["_col1","_col2"] + | | Statistics:Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_16] + | | key expressions:_col0 (type: int), _col3 (type: int) + | | Map-reduce partition columns:_col0 (type: int), _col3 (type: int) + | | sort order:++ + | | Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + | | value expressions:_col1 (type: int), _col2 (type: int) + | | Select Operator [SEL_2] + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + | | Filter Operator [FIL_32] + | | predicate:(((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean) + | | Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + | | TableScan [TS_0] + | | alias:lineitem + | | Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 4 [SIMPLE_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col0 (type: int), _col1 (type: int) + | Map-reduce partition columns:_col0 (type: int), _col1 (type: int) + | sort order:++ + | Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_14] + | keys:_col0 (type: int), _col1 (type: int) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_5] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_33] + | predicate:(((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean) + | Statistics:Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_3] + | alias:lineitem + | Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_23] + key expressions:_col0 (type: int) + Map-reduce partition columns:_col0 (type: int) + sort order:+ + Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_11] + | keys:KEY._col0 (type: int) + | outputColumnNames:["_col0"] + | Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col0 (type: int) + Map-reduce partition columns:_col0 (type: int) + sort order:+ + Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_9] + keys:_col0 (type: int) + outputColumnNames:["_col0"] + Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_7] + outputColumnNames:["_col0"] + Statistics:Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_34] + predicate:l_partkey is not null (type: boolean) + Statistics:Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_6] + alias:lineitem + Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select key, value, count(*) +from src_cbo b +where b.key in (select key from src_cbo where src_cbo.key > '8') +group by key, value +having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value, count(*) +from src_cbo b +where b.key in (select key from src_cbo where src_cbo.key > '8') +group by key, value +having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key ) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_34] + compressed:false + Statistics:Num rows: 41 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_46] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: bigint)","0":"_col2 (type: bigint)"} + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 41 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_29] + | key expressions:_col2 (type: bigint) + | Map-reduce partition columns:_col2 (type: bigint) + | sort order:+ + | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col1 (type: string) + | Filter Operator [FIL_40] + | predicate:_col2 is not null (type: boolean) + | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_16] + | | aggregations:["count(VALUE._col0)"] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_15] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col2 (type: bigint) + | Group By Operator [GBY_14] + | aggregations:["count()"] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE + | Merge Join Operator [MERGEJOIN_45] + | | condition map:[{"":"Left Semi Join 0 to 1"}] + | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_9] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_2] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | | Filter Operator [FIL_41] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | | TableScan [TS_0] + | | alias:b + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 5 [SIMPLE_EDGE] + | Reduce Output Operator [RS_11] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_7] + | keys:_col0 (type: string) + | outputColumnNames:["_col0"] + | Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_42] + | predicate:(key > '8') (type: boolean) + | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_3] + | alias:b + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_31] + key expressions:_col0 (type: bigint) + Map-reduce partition columns:_col0 (type: bigint) + sort order:+ + Statistics:Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_27] + keys:_col0 (type: bigint) + outputColumnNames:["_col0"] + Statistics:Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_25] + outputColumnNames:["_col0"] + Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_43] + predicate:_col1 is not null (type: boolean) + Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_24] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_23] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: bigint) + Group By Operator [GBY_22] + aggregations:["count()"] + keys:_col0 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_20] + outputColumnNames:["_col0"] + Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_44] + predicate:(key > '9') (type: boolean) + Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_18] + alias:b + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select p_mfgr, p_name, avg(p_size) +from part +group by p_mfgr, p_name +having p_name in + (select first_value(p_name) over(partition by p_mfgr order by p_size) from part) +PREHOOK: type: QUERY +POSTHOOK: query: explain select p_mfgr, p_name, avg(p_size) +from part +group by p_mfgr, p_name +having p_name in + (select first_value(p_name) over(partition by p_mfgr order by p_size) from part) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_22] + compressed:false + Statistics:Num rows: 6 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_27] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 6 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_17] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 13 Data size: 2951 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col2 (type: double) + | Group By Operator [GBY_6] + | | aggregations:["avg(VALUE._col0)"] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 13 Data size: 2951 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 13 Data size: 2847 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col2 (type: struct) + | Group By Operator [GBY_4] + | aggregations:["avg(_col2)"] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 13 Data size: 2847 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_25] + | predicate:p_name is not null (type: boolean) + | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:part + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_19] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_15] + keys:_col0 (type: string) + outputColumnNames:["_col0"] + Statistics:Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_12] + outputColumnNames:["_col0"] + Statistics:Num rows: 13 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_26] + predicate:_wcol0 is not null (type: boolean) + Statistics:Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_11] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col5"}] + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_10] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_9] + key expressions:p_mfgr (type: string), p_size (type: int) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_name (type: string) + TableScan [TS_8] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from src_cbo +where src_cbo.key not in + ( select key from src_cbo s1 + where s1.key > '2' + ) order by key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_27] + compressed:false + Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_26] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_25] + key expressions:_col0 (type: string) + sort order:+ + Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: string) + Select Operator [SEL_24] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_30] + predicate:_col3 is null (type: boolean) + Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_35] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1","_col3"] + | Statistics:Num rows: 605 Data size: 107690 Basic stats: COMPLETE Column stats: NONE + |<-Map 7 [SIMPLE_EDGE] + | Reduce Output Operator [RS_21] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_15] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_33] + | predicate:(key > '2') (type: boolean) + | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_13] + | alias:src_cbo + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_20] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: string) + Merge Join Operator [MERGEJOIN_34] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_17] + | sort order: + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col1 (type: string) + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:src_cbo + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_18] + sort order: + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_10] + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator [FIL_31] + predicate:(_col0 = 0) (type: boolean) + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_9] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + sort order: + Statistics:Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_7] + aggregations:["count()"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_5] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_32] + predicate:((key > '2') and key is null) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + TableScan [TS_3] + alias:src_cbo + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size as r from part) a + where r < 10 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_25] + compressed:false + Statistics:Num rows: 15 Data size: 3507 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_24] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 15 Data size: 3507 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_28] + predicate:_col4 is null (type: boolean) + Statistics:Num rows: 15 Data size: 3507 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_33] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: string), _col1 (type: string)","0":"_col0 (type: string), _col1 (type: string)"} + | outputColumnNames:["_col0","_col1","_col2","_col4"] + | Statistics:Num rows: 30 Data size: 7014 Basic stats: COMPLETE Column stats: NONE + |<-Map 6 [SIMPLE_EDGE] + | Reduce Output Operator [RS_21] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_15] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_31] + | predicate:(p_size < 10) (type: boolean) + | Statistics:Num rows: 8 Data size: 1784 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_13] + | alias:b + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_20] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 28 Data size: 6377 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: int) + Merge Join Operator [MERGEJOIN_32] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{} + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 28 Data size: 6377 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_17] + | sort order: + | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: int) + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:b + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_18] + sort order: + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_10] + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator [FIL_29] + predicate:(_col0 = 0) (type: boolean) + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_9] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + sort order: + Statistics:Num rows: 1 Data size: 231 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_7] + aggregations:["count()"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 231 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_5] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_30] + predicate:((p_size < 10) and (p_name is null or p_mfgr is null)) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + TableScan [TS_3] + alias:b + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) order by p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size from part) a + where p_size < 10 + ) order by p_name +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_39] + compressed:false + Statistics:Num rows: 6 Data size: 825 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_38] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 6 Data size: 825 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_37] + key expressions:_col0 (type: string) + sort order:+ + Statistics:Num rows: 6 Data size: 825 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: int) + Merge Join Operator [MERGEJOIN_49] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 6 Data size: 825 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_31] + | sort order: + | Statistics:Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col1 (type: int) + | Filter Operator [FIL_42] + | predicate:_col2 is null (type: boolean) + | Statistics:Num rows: 6 Data size: 750 Basic stats: COMPLETE Column stats: COMPLETE + | Merge Join Operator [MERGEJOIN_48] + | | condition map:[{"":"Left Outer Join0 to 1"}] + | | keys:{"1":"_col0 (type: double)","0":"UDFToDouble(_col1) (type: double)"} + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_28] + | | key expressions:UDFToDouble(_col1) (type: double) + | | Map-reduce partition columns:UDFToDouble(_col1) (type: double) + | | sort order:+ + | | Statistics:Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + | | value expressions:_col0 (type: string), _col1 (type: int) + | | Select Operator [SEL_2] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + | | TableScan [TS_0] + | | alias:part + | | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Reducer 6 [SIMPLE_EDGE] + | Reduce Output Operator [RS_29] + | key expressions:_col0 (type: double) + | Map-reduce partition columns:_col0 (type: double) + | sort order:+ + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_9] + | | aggregations:["avg(VALUE._col0)"] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 5 [SIMPLE_EDGE] + | Reduce Output Operator [RS_8] + | sort order: + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + | value expressions:_col0 (type: struct) + | Group By Operator [GBY_7] + | aggregations:["avg(_col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_44] + | predicate:(p_size < 10) (type: boolean) + | Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_3] + | alias:part + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_32] + sort order: + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Select Operator [SEL_24] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_45] + predicate:(_col0 = 0) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator [GBY_23] + aggregations:["count()"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_18] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_46] + predicate:_col0 is null (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator [GBY_17] + | aggregations:["avg(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_16] + sort order: + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col0 (type: struct) + Group By Operator [GBY_15] + aggregations:["avg(_col0)"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_13] + outputColumnNames:["_col0"] + Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_47] + predicate:(p_size < 10) (type: boolean) + Statistics:Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_11] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) + order by b.p_mfgr +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) + order by b.p_mfgr +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 10 <- Map 9 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 5 + File Output Operator [FS_42] + compressed:false + Statistics:Num rows: 2 Data size: 256 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_41] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 2 Data size: 256 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_40] + key expressions:_col0 (type: string) + sort order:+ + Statistics:Num rows: 2 Data size: 256 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: double) + Select Operator [SEL_39] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 2 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_45] + predicate:_col3 is null (type: boolean) + Statistics:Num rows: 2 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_51] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"1":"_col0 (type: string), _col1 (type: double)","0":"_col0 (type: string), _col1 (type: double)"} + | outputColumnNames:["_col0","_col1","_col3"] + | Statistics:Num rows: 5 Data size: 641 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_36] + | key expressions:_col0 (type: string), _col1 (type: double) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: double) + | sort order:++ + | Statistics:Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_30] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_48] + | predicate:((_col2 - _col1) > 600.0) (type: boolean) + | Statistics:Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_28] + | | aggregations:["min(VALUE._col0)","max(VALUE._col1)"] + | | keys:KEY._col0 (type: string) + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 5 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 9 [SIMPLE_EDGE] + | Reduce Output Operator [RS_27] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 5 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: double), _col2 (type: double) + | Group By Operator [GBY_26] + | aggregations:["min(_col1)","max(_col1)"] + | keys:_col0 (type: string) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 5 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_24] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_23] + | alias:b + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_35] + key expressions:_col0 (type: string), _col1 (type: double) + Map-reduce partition columns:_col0 (type: string), _col1 (type: double) + sort order:++ + Statistics:Num rows: 5 Data size: 583 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_50] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 5 Data size: 583 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_32] + | sort order: + | Statistics:Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col1 (type: double) + | Group By Operator [GBY_6] + | | aggregations:["min(VALUE._col0)"] + | | keys:KEY._col0 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: double) + | Group By Operator [GBY_4] + | aggregations:["min(_col1)"] + | keys:_col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:b + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_33] + sort order: + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_20] + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator [FIL_46] + predicate:(_col0 = 0) (type: boolean) + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_19] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_18] + sort order: + Statistics:Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_17] + aggregations:["count()"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_15] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Filter Operator [FIL_47] + predicate:((_col0 is null or _col2 is null) and ((_col1 - _col2) > 600.0)) (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Group By Operator [GBY_13] + | aggregations:["max(VALUE._col0)","min(VALUE._col1)"] + | keys:KEY._col0 (type: string) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 5 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 5 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: double), _col2 (type: double) + Group By Operator [GBY_11] + aggregations:["max(_col1)","min(_col1)"] + keys:_col0 (type: string) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 5 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_9] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_8] + alias:b + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from cbo_t1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from cbo_t1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:false + Statistics:Num rows: 20 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_4] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Statistics:Num rows: 20 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"0","name:":"windowingtablefunction","order by:":"0"}] + Statistics:Num rows: 20 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col2","_col3"] + | Statistics:Num rows: 20 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:0 (type: int) + Map-reduce partition columns:0 (type: int) + sort order:+ + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:c_int (type: int), c_float (type: float) + TableScan [TS_0] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select * from (select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from cbo_t1) cbo_t1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from (select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from cbo_t1) cbo_t1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:false + Statistics:Num rows: 20 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_4] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Statistics:Num rows: 20 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"0","name:":"windowingtablefunction","order by:":"0"}] + Statistics:Num rows: 20 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col2","_col3"] + | Statistics:Num rows: 20 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:0 (type: int) + Map-reduce partition columns:0 (type: int) + sort order:+ + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:c_int (type: int), c_float (type: float) + TableScan [TS_0] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select i, a, h, b, c, d, e, f, g, a as x, a +1 as y from (select max(c_int) over (partition by key order by value range UNBOUNDED PRECEDING) a, min(c_int) over (partition by key order by value range current row) b, count(c_int) over(partition by key order by value range 1 PRECEDING) c, avg(value) over (partition by key order by value range between unbounded preceding and unbounded following) d, sum(value) over (partition by key order by value range between unbounded preceding and current row) e, avg(c_float) over (partition by key order by value range between 1 preceding and unbounded following) f, sum(c_float) over (partition by key order by value range between 1 preceding and current row) g, max(c_float) over (partition by key order by value range between 1 preceding and unbounded following) h, min(c_float) over (partition by key order by value range between 1 preceding and 1 following) i from cbo_t1) cbo_t1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select i, a, h, b, c, d, e, f, g, a as x, a +1 as y from (select max(c_int) over (partition by key order by value range UNBOUNDED PRECEDING) a, min(c_int) over (partition by key order by value range current row) b, count(c_int) over(partition by key order by value range 1 PRECEDING) c, avg(value) over (partition by key order by value range between unbounded preceding and unbounded following) d, sum(value) over (partition by key order by value range between unbounded preceding and current row) e, avg(c_float) over (partition by key order by value range between 1 preceding and unbounded following) f, sum(c_float) over (partition by key order by value range between 1 preceding and current row) g, max(c_float) over (partition by key order by value range between 1 preceding and unbounded following) h, min(c_float) over (partition by key order by value range between 1 preceding and 1 following) i from cbo_t1) cbo_t1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:false + Statistics:Num rows: 20 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_4] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Statistics:Num rows: 20 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col0","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 20 Data size: 3204 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 20 Data size: 3204 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:key (type: string), value (type: string) + Map-reduce partition columns:key (type: string) + sort order:++ + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:c_int (type: int), c_float (type: float) + TableScan [TS_0] + alias:cbo_t1 + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select *, rank() over(partition by key order by value) as rr from src1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select *, rank() over(partition by key order by value) as rr from src1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:false + Statistics:Num rows: 25 Data size: 4475 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_4] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 25 Data size: 4475 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col0","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:key (type: string), value (type: string) + Map-reduce partition columns:key (type: string) + sort order:++ + Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:src1 + Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_20] + compressed:false + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_18] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_17] + sort order: + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint), _col1 (type: bigint) + Group By Operator [GBY_16] + aggregations:["sum(_col0)","sum(_col1)"] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_14] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_13] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: bigint) + Group By Operator [GBY_11] + aggregations:["count(1)"] + keys:_col0 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_9] + outputColumnNames:["_col0"] + Statistics:Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_25] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + | Statistics:Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_1] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_23] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_3] + outputColumnNames:["_col0"] + Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_24] + predicate:key is not null (type: boolean) + Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_2] + alias:x + Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_20] + compressed:false + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_18] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_17] + sort order: + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint), _col1 (type: bigint) + Group By Operator [GBY_16] + aggregations:["sum(_col0)","sum(_col1)"] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_14] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_13] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: bigint) + Group By Operator [GBY_11] + aggregations:["count(1)"] + keys:_col0 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_9] + outputColumnNames:["_col0"] + Statistics:Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_25] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + | Statistics:Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_1] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_23] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_3] + outputColumnNames:["_col0"] + Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_24] + predicate:key is not null (type: boolean) + Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_2] + alias:x + Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Map 1 <- Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_20] + compressed:false + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_18] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_17] + sort order: + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint), _col1 (type: bigint) + Group By Operator [GBY_16] + aggregations:["sum(_col0)","sum(_col1)"] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_14] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_13] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: bigint) + Group By Operator [GBY_11] + aggregations:["count(1)"] + keys:_col0 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 14 Data size: 1316 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_9] + outputColumnNames:["_col0"] + Statistics:Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator [MAPJOIN_25] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 1":"_col0 (type: string)","Map 4":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + | Statistics:Num rows: 60 Data size: 5160 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_3] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_24] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_2] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + |<-Select Operator [SEL_1] + outputColumnNames:["_col0"] + Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_23] + predicate:key is not null (type: boolean) + Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:y + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x LEFT SEMI JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: explain +select SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (select x.key AS key, count(1) AS cnt + FROM src1 x LEFT SEMI JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_22] + compressed:false + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_20] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_19] + sort order: + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint), _col1 (type: bigint) + Group By Operator [GBY_18] + aggregations:["sum(_col0)","sum(_col1)"] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_16] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_15] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_14] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: bigint) + Group By Operator [GBY_13] + aggregations:["count(1)"] + keys:_col0 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_27] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0"] + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_1] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_25] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_9] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_5] + keys:_col0 (type: string) + outputColumnNames:["_col0"] + Statistics:Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_3] + outputColumnNames:["_col0"] + Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_26] + predicate:key is not null (type: boolean) + Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_2] + alias:y + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain create table abcd (a int, b int, c int, d int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table abcd (a int, b int, c int, d int) +POSTHOOK: type: CREATETABLE +Stage-0 + Create Table Operator: + columns:["a int","b int","c int","d int"] + input format:org.apache.hadoop.mapred.TextInputFormat + name:default.abcd + output format:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat +PREHOOK: query: create table abcd (a int, b int, c int, d int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@abcd +POSTHOOK: query: create table abcd (a int, b int, c int, d int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@abcd +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@abcd +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@abcd +PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_7] + compressed:false + Statistics:Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_5] + | aggregations:["count(DISTINCT KEY._col1:0._col0)","count(DISTINCT KEY._col1:1._col0)","sum(VALUE._col2)"] + | keys:KEY._col0 (type: int) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col0 (type: int), _col1 (type: int), _col2 (type: int) + Map-reduce partition columns:_col0 (type: int) + sort order:+++ + Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions:_col5 (type: bigint) + Group By Operator [GBY_3] + aggregations:["count(DISTINCT _col1)","count(DISTINCT _col2)","sum(_col3)"] + keys:_col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:abcd + Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:false + Statistics:Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_4] + | aggregations:["count(DISTINCT KEY._col1:0._col0)","count(DISTINCT KEY._col1:1._col0)","sum(VALUE._col0)"] + | keys:KEY._col0 (type: int) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:_col0 (type: int), _col1 (type: int), _col2 (type: int) + Map-reduce partition columns:_col0 (type: int) + sort order:+++ + Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions:_col3 (type: int) + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:abcd + Statistics:Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain create table src_rc_merge_test(key int, value string) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table src_rc_merge_test(key int, value string) stored as rcfile +POSTHOOK: type: CREATETABLE +Stage-0 + Create Table Operator: + columns:["key int","value string"] + input format:org.apache.hadoop.hive.ql.io.RCFileInputFormat + name:default.src_rc_merge_test + output format:org.apache.hadoop.hive.ql.io.RCFileOutputFormat +PREHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_rc_merge_test +POSTHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_rc_merge_test +PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@src_rc_merge_test +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' into table src_rc_merge_test +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@src_rc_merge_test +PREHOOK: query: explain create table tgt_rc_merge_test(key int, value string) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table tgt_rc_merge_test(key int, value string) stored as rcfile +POSTHOOK: type: CREATETABLE +Stage-0 + Create Table Operator: + columns:["key int","value string"] + input format:org.apache.hadoop.hive.ql.io.RCFileInputFormat + name:default.tgt_rc_merge_test + output format:org.apache.hadoop.hive.ql.io.RCFileOutputFormat +PREHOOK: query: create table tgt_rc_merge_test(key int, value string) stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: query: create table tgt_rc_merge_test(key int, value string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tgt_rc_merge_test +PREHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@src_rc_merge_test +PREHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_rc_merge_test +POSTHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: show table extended like `tgt_rc_merge_test` +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `tgt_rc_merge_test` +POSTHOOK: type: SHOW_TABLESTATUS +tableName:tgt_rc_merge_test +#### A masked pattern was here #### +inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat +outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat +columns:struct columns { i32 key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:171 +maxFileSize:171 +minFileSize:171 +#### A masked pattern was here #### + +PREHOOK: query: explain select count(1) from tgt_rc_merge_test +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from tgt_rc_merge_test +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_4] + | aggregations:["count(1)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + sort order: + Statistics:Num rows: 5 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_1] + Statistics:Num rows: 5 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:tgt_rc_merge_test + Statistics:Num rows: 5 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_4] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + sort order: + Statistics:Num rows: 5 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: int), _col1 (type: int) + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 5 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:tgt_rc_merge_test + Statistics:Num rows: 5 Data size: 32 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: alter table tgt_rc_merge_test concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: query: alter table tgt_rc_merge_test concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: default@tgt_rc_merge_test +PREHOOK: query: show table extended like `tgt_rc_merge_test` +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `tgt_rc_merge_test` +POSTHOOK: type: SHOW_TABLESTATUS +tableName:tgt_rc_merge_test +#### A masked pattern was here #### +inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat +outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat +columns:struct columns { i32 key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:171 +maxFileSize:171 +minFileSize:171 +#### A masked pattern was here #### + +PREHOOK: query: explain select count(1) from tgt_rc_merge_test +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from tgt_rc_merge_test +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_4] + | aggregations:["count(1)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + sort order: + Statistics:Num rows: 0 Data size: 171 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_1] + Statistics:Num rows: 0 Data size: 171 Basic stats: PARTIAL Column stats: COMPLETE + TableScan [TS_0] + alias:tgt_rc_merge_test + Statistics:Num rows: 0 Data size: 171 Basic stats: PARTIAL Column stats: COMPLETE +PREHOOK: query: explain select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_4] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + sort order: + Statistics:Num rows: 1 Data size: 171 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: int), _col1 (type: int) + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1 Data size: 171 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:tgt_rc_merge_test + Statistics:Num rows: 1 Data size: 171 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: drop table src_rc_merge_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_rc_merge_test +PREHOOK: Output: default@src_rc_merge_test +POSTHOOK: query: drop table src_rc_merge_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_rc_merge_test +POSTHOOK: Output: default@src_rc_merge_test +PREHOOK: query: drop table tgt_rc_merge_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: query: drop table tgt_rc_merge_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: default@tgt_rc_merge_test +PREHOOK: query: explain select src.key from src cross join src src2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src cross join src src2 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_9] + compressed:true + Statistics:Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_11] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{} + | outputColumnNames:["_col0"] + | Statistics:Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | sort order: + | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string) + | Select Operator [SEL_1] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:src + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_6] + sort order: + Statistics:Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_3] + Statistics:Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan [TS_2] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain create table nzhang_Tmp(a int, b string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table nzhang_Tmp(a int, b string) +POSTHOOK: type: CREATETABLE +Stage-0 + Create Table Operator: + columns:["a int","b string"] + input format:org.apache.hadoop.mapred.TextInputFormat + name:default.nzhang_Tmp + output format:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat +PREHOOK: query: create table nzhang_Tmp(a int, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_Tmp +POSTHOOK: query: create table nzhang_Tmp(a int, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_Tmp +PREHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-3 + Stats-Aggr Operator + Stage-4 + Create Table Operator: + columns:["k string","value string"] + input format:org.apache.hadoop.mapred.TextInputFormat + name:default.nzhang_CTAS1 + output format:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 + File Output Operator [FS_8] + compressed:true + Statistics:Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.nzhang_CTAS1","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Limit [LIM_7] + Number of rows:10 + Statistics:Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_6] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col0 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Limit [LIM_4] + Number of rows:10 + Statistics:Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_2] + key expressions:_col0 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Stage-0 + Move Operator + Please refer to the previous Stage-1 +PREHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_CTAS1 +POSTHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_CTAS1 +PREHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-3 + Stats-Aggr Operator + Stage-4 + Create Table Operator: + columns:["half_key double","conb string"] + input format:org.apache.hadoop.hive.ql.io.RCFileInputFormat + name:default.nzhang_ctas3 + output format:org.apache.hadoop.hive.ql.io.RCFileOutputFormat + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 + File Output Operator [FS_8] + compressed:true + Statistics:Num rows: 10 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe","name:":"default.nzhang_ctas3","input format:":"org.apache.hadoop.hive.ql.io.RCFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.RCFileOutputFormat"} + Limit [LIM_7] + Number of rows:10 + Statistics:Num rows: 10 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_6] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 10 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col0 (type: double), _col1 (type: string) + sort order:++ + Statistics:Num rows: 10 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + Limit [LIM_4] + Number of rows:10 + Statistics:Num rows: 10 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 500 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_2] + key expressions:_col0 (type: double), _col1 (type: string) + sort order:++ + Statistics:Num rows: 500 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 500 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Stage-0 + Move Operator + Please refer to the previous Stage-1 +PREHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@nzhang_ctas3 +POSTHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nzhang_ctas3 +PREHOOK: query: explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +POSTHOOK: type: CREATETABLE +PREHOOK: query: create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +POSTHOOK: type: CREATETABLE +PREHOOK: query: explain create temporary table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create temporary table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +Stage-0 + Create Table Operator: + # buckets:2 + bucket columns:["a"] + columns:["a int","b varchar(128)"] + input format:org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + name:default.acid_dtt + output format:org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +PREHOOK: query: create temporary table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_dtt +POSTHOOK: query: create temporary table acid_dtt(a int, b varchar(128)) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_dtt +PREHOOK: query: explain +select src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (select * FROM src WHERE src.key < 10) src1 + JOIN + (select * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (select * FROM src WHERE src.key < 10) src1 + JOIN + (select * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_12] + compressed:true + Statistics:Num rows: 13778 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_11] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 13778 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order:++++ + Statistics:Num rows: 13778 Data size: 4904968 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_15] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{} + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 13778 Data size: 4904968 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_6] + | sort order: + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col1 (type: string) + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_13] + | predicate:(key < 10) (type: boolean) + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:src + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + sort order: + Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string), _col1 (type: string) + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_14] + predicate:(key < 10) (type: boolean) + Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_3] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: CREATE TABLE myinput1(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1 +POSTHOOK: query: CREATE TABLE myinput1(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in8.txt' INTO TABLE myinput1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@myinput1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in8.txt' INTO TABLE myinput1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@myinput1 +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_7] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"value (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6"] + | Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: int) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:value (type: int) + Map-reduce partition columns:value (type: int) + sort order:+ + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions:key (type: int) + TableScan [TS_1] + alias:b + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_11] + compressed:true + Statistics:Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_10] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_21] + | condition map:[{"":"Inner Join 0 to 1"},{"":"Inner Join 0 to 2"}] + | keys:{"2":"key (type: int)","1":"value (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6","_col10","_col11"] + | Statistics:Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_4] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: int) + | Filter Operator [FIL_18] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_6] + | key expressions:value (type: int) + | Map-reduce partition columns:value (type: int) + | sort order:+ + | Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + | value expressions:key (type: int) + | Filter Operator [FIL_19] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_1] + | alias:b + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + key expressions:key (type: int) + Map-reduce partition columns:key (type: int) + sort order:+ + Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions:value (type: int) + Filter Operator [FIL_20] + predicate:key is not null (type: boolean) + Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:c + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_8] + compressed:true + Statistics:Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_9] + | condition map:[{"":"Inner Join 0 to 1"},{"":"Inner Join 0 to 2"}] + | keys:{"2":"key (type: int)","1":"value (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6","_col10","_col11"] + | Statistics:Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_3] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: int) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_4] + | key expressions:value (type: int) + | Map-reduce partition columns:value (type: int) + | sort order:+ + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + | value expressions:key (type: int) + | TableScan [TS_1] + | alias:b + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:key (type: int) + Map-reduce partition columns:key (type: int) + sort order:+ + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions:value (type: int) + TableScan [TS_2] + alias:c + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_11] + compressed:true + Statistics:Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_10] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_15] + | condition map:[{"":"Inner Join 0 to 1"},{"":"Inner Join 0 to 2"}] + | keys:{"2":"key (type: int), value (type: int)","1":"value (type: int), key (type: int)","0":"key (type: int), value (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6","_col10","_col11"] + | Statistics:Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_4] + | key expressions:key (type: int), value (type: int) + | Map-reduce partition columns:key (type: int), value (type: int) + | sort order:++ + | Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_12] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_6] + | key expressions:value (type: int), key (type: int) + | Map-reduce partition columns:value (type: int), key (type: int) + | sort order:++ + | Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_13] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_1] + | alias:b + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + key expressions:key (type: int), value (type: int) + Map-reduce partition columns:key (type: int), value (type: int) + sort order:++ + Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_14] + predicate:value is not null (type: boolean) + Statistics:Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:c + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_8] + compressed:true + Statistics:Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_9] + | condition map:[{"":"Inner Join 0 to 1"},{"":"Inner Join 0 to 2"}] + | keys:{"2":"key (type: int), value (type: int)","1":"value (type: int), key (type: int)","0":"key (type: int), value (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6","_col10","_col11"] + | Statistics:Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_3] + | key expressions:key (type: int), value (type: int) + | Map-reduce partition columns:key (type: int), value (type: int) + | sort order:++ + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_4] + | key expressions:value (type: int), key (type: int) + | Map-reduce partition columns:value (type: int), key (type: int) + | sort order:++ + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_1] + | alias:b + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:key (type: int), value (type: int) + Map-reduce partition columns:key (type: int), value (type: int) + sort order:++ + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:c + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_7] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"1":"value (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6"] + | Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: int) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:value (type: int) + Map-reduce partition columns:value (type: int) + sort order:+ + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions:key (type: int) + TableScan [TS_1] + alias:b + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_7] + | condition map:[{"":"Right Outer Join0 to 1"}] + | keys:{"1":"value (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6"] + | Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: int) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:value (type: int) + Map-reduce partition columns:value (type: int) + sort order:+ + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions:key (type: int) + TableScan [TS_1] + alias:b + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_7] + | condition map:[{"":"Outer Join 0 to 1"}] + | keys:{"1":"value (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6"] + | Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: int) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:value (type: int) + Map-reduce partition columns:value (type: int) + sort order:+ + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions:key (type: int) + TableScan [TS_1] + alias:b + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_7] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"value (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6"] + | Statistics:Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: int) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:value (type: int) + Map-reduce partition columns:value (type: int) + sort order:+ + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions:key (type: int) + TableScan [TS_1] + alias:b + Statistics:Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: CREATE TABLE smb_input(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_input +POSTHOOK: query: CREATE TABLE smb_input(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_input +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' into table smb_input +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' into table smb_input +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in5.txt' into table smb_input +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in5.txt' into table smb_input +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input +PREHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_input1 +POSTHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: from smb_input +insert overwrite table smb_input1 select * +insert overwrite table smb_input2 select * +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input +PREHOOK: Output: default@smb_input1 +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: from smb_input +insert overwrite table smb_input1 select * +insert overwrite table smb_input2 select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input +POSTHOOK: Output: default@smb_input1 +POSTHOOK: Output: default@smb_input2 +POSTHOOK: Lineage: smb_input1.key SIMPLE [(smb_input)smb_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_input1.value SIMPLE [(smb_input)smb_input.FieldSchema(name:value, type:int, comment:null), ] +POSTHOOK: Lineage: smb_input2.key SIMPLE [(smb_input)smb_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_input2.value SIMPLE [(smb_input)smb_input.FieldSchema(name:value, type:int, comment:null), ] +PREHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_7] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"key (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6"] + | Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: int) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:key (type: int) + Map-reduce partition columns:key (type: int) + sort order:+ + Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + value expressions:value (type: int) + TableScan [TS_1] + alias:b + Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key AND a.value <=> b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key AND a.value <=> b.value +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_7] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"key (type: int), value (type: int)","0":"key (type: int), value (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6"] + | Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:key (type: int), value (type: int) + | Map-reduce partition columns:key (type: int), value (type: int) + | sort order:++ + | Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:key (type: int), value (type: int) + Map-reduce partition columns:key (type: int), value (type: int) + sort order:++ + Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_1] + alias:b + Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key <=> b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key <=> b.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_7] + | condition map:[{"":"Right Outer Join0 to 1"}] + | keys:{"1":"key (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6"] + | Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: int) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:key (type: int) + Map-reduce partition columns:key (type: int) + sort order:+ + Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + value expressions:value (type: int) + TableScan [TS_1] + alias:b + Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_7] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"key (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6"] + | Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: int) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:key (type: int) + Map-reduce partition columns:key (type: int) + sort order:+ + Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + value expressions:value (type: int) + TableScan [TS_1] + alias:b + Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key <=> b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key <=> b.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_6] + compressed:true + Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_7] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"1":"key (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1","_col5","_col6"] + | Statistics:Num rows: 28 Data size: 209 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: int) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:key (type: int) + Map-reduce partition columns:key (type: int) + sort order:+ + Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE + value expressions:value (type: int) + TableScan [TS_1] + alias:b + Statistics:Num rows: 26 Data size: 190 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: drop table sales +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table sales +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table things +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table things +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE sales (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sales +POSTHOOK: query: CREATE TABLE sales (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sales +PREHOOK: query: CREATE TABLE things (id INT, name STRING) partitioned by (ds string) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@things +POSTHOOK: query: CREATE TABLE things (id INT, name STRING) partitioned by (ds string) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@things +PREHOOK: query: load data local inpath '../../data/files/sales.txt' INTO TABLE sales +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@sales +POSTHOOK: query: load data local inpath '../../data/files/sales.txt' INTO TABLE sales +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@sales +PREHOOK: query: load data local inpath '../../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@things +POSTHOOK: query: load data local inpath '../../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@things +POSTHOOK: Output: default@things@ds=2011-10-23 +PREHOOK: query: load data local inpath '../../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@things +POSTHOOK: query: load data local inpath '../../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@things +POSTHOOK: Output: default@things@ds=2011-10-24 +PREHOOK: query: explain select name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id) +PREHOOK: type: QUERY +POSTHOOK: query: explain select name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_12] + compressed:true + Statistics:Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_17] + | condition map:[{"":"Left Semi Join 0 to 1"}] + | keys:{"1":"_col0 (type: int)","0":"_col1 (type: int)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col1 (type: int) + | Map-reduce partition columns:_col1 (type: int) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | value expressions:_col0 (type: string) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | Filter Operator [FIL_15] + | predicate:id is not null (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | TableScan [TS_0] + | alias:sales + | Statistics:Num rows: 0 Data size: 13 Basic stats: PARTIAL Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_9] + key expressions:_col0 (type: int) + Map-reduce partition columns:_col0 (type: int) + sort order:+ + Statistics:Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_5] + keys:_col0 (type: int) + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_3] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_16] + predicate:id is not null (type: boolean) + Statistics:Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:things + Statistics:Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: drop table sales +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@sales +PREHOOK: Output: default@sales +POSTHOOK: query: drop table sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@sales +POSTHOOK: Output: default@sales +PREHOOK: query: drop table things +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@things +PREHOOK: Output: default@things +POSTHOOK: query: drop table things +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@things +POSTHOOK: Output: default@things +PREHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +PREHOOK: type: QUERY +POSTHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Map 3 <- Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_20] + compressed:true + Statistics:Num rows: 555 Data size: 48285 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_19] + outputColumnNames:["_col0"] + Statistics:Num rows: 555 Data size: 48285 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_30] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + | Statistics:Num rows: 555 Data size: 48285 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_15] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_26] + | predicate:(value > 'val_450') (type: boolean) + | Statistics:Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:src + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_17] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) + sort order:+ + Statistics:Num rows: 241 Data size: 42898 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string) + Map Join Operator [MAPJOIN_29] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 3":"_col0 (type: string)","Map 4":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 241 Data size: 42898 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_11] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_7] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_28] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_6] + | alias:src1 + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + |<-Select Operator [SEL_5] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 666 Data size: 118548 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_27] + predicate:(((value > 'val_450') and key is not null) and value is not null) (type: boolean) + Statistics:Num rows: 666 Data size: 118548 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_3] + alias:srcpart + Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +PREHOOK: type: QUERY +POSTHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Map 3 <- Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_20] + compressed:true + Statistics:Num rows: 555 Data size: 48285 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_19] + outputColumnNames:["_col0"] + Statistics:Num rows: 555 Data size: 48285 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_30] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + | Statistics:Num rows: 555 Data size: 48285 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_15] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_26] + | predicate:(value > 'val_450') (type: boolean) + | Statistics:Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:src + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_17] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) + sort order:+ + Statistics:Num rows: 241 Data size: 42898 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string) + Map Join Operator [MAPJOIN_29] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 3":"_col0 (type: string)","Map 4":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 241 Data size: 42898 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_11] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_7] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_28] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_6] + | alias:src1 + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + |<-Select Operator [SEL_5] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 666 Data size: 118548 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_27] + predicate:(((value > 'val_450') and key is not null) and value is not null) (type: boolean) + Statistics:Num rows: 666 Data size: 118548 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_3] + alias:srcpart + Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +PREHOOK: type: QUERY +POSTHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Map 3 <- Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_20] + compressed:true + Statistics:Num rows: 555 Data size: 48285 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_19] + outputColumnNames:["_col0"] + Statistics:Num rows: 555 Data size: 48285 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_30] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + | Statistics:Num rows: 555 Data size: 48285 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_15] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_26] + | predicate:(value > 'val_450') (type: boolean) + | Statistics:Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:src + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_17] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) + sort order:+ + Statistics:Num rows: 241 Data size: 42898 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: string) + Map Join Operator [MAPJOIN_29] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 3":"_col0 (type: string)","Map 4":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 241 Data size: 42898 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_11] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_7] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_28] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_6] + | alias:src1 + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + |<-Select Operator [SEL_5] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 666 Data size: 118548 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_27] + predicate:(((value > 'val_450') and key is not null) and value is not null) (type: boolean) + Statistics:Num rows: 666 Data size: 118548 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_3] + alias:srcpart + Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_9] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_6] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_5] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col5 (type: int), _col7 (type: double) + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int), p_retailprice (type: double) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j +distribute by j.p_mfgr +sort by j.p_name) +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Map 1 <- Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_16] + compressed:true + Statistics:Num rows: 29 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_14] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 29 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_13] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 29 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_12] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 29 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_11] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 29 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col5 (type: int) + PTF Operator [PTF_10] + Function definitions:[{"Input definition":{"type:":"SUBQUERY"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 29 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_9] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 29 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col5 (type: int) + Map Join Operator [MAPJOIN_21] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 1":"p_partkey (type: int)","Map 4":"p_partkey (type: int)"} + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:p_partkey (type: int) + | Map-reduce partition columns:p_partkey (type: int) + | sort order:+ + | Statistics:Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_20] + | predicate:p_partkey is not null (type: boolean) + | Statistics:Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_1] + | alias:p2 + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + |<-Filter Operator [FIL_19] + predicate:p_partkey is not null (type: boolean) + Statistics:Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:p1 + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part + partition by p_mfgr + order by p_name + ) abc +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_9] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_6] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_5] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col5 (type: int), _col7 (type: double) + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int), p_retailprice (type: double) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_9] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_6] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_5] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col5 (type: int) + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz +from noop(on part + partition by p_mfgr + order by p_name + ) +group by p_mfgr, p_name, p_size +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 5 + File Output Operator [FS_14] + compressed:true + Statistics:Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_12] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Statistics:Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_11] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col0","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_10] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_9] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:++ + Statistics:Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col2 (type: int) + Group By Operator [GBY_8] + | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: int) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order:+++ + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator [GBY_6] + | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col2 (type: string), _col1 (type: string), _col5 (type: int) + Map-reduce partition columns:rand() (type: double) + sort order:+++ + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: explain +select abc.* +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_11] + compressed:true + Statistics:Num rows: 14 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Map Join Operator [MAPJOIN_16] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Reducer 2":"_col0 (type: int)","Map 3":"p_partkey (type: int)"} + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + | Statistics:Num rows: 14 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Map 3 [BROADCAST_EDGE] + | Reduce Output Operator [RS_8] + | key expressions:p_partkey (type: int) + | Map-reduce partition columns:p_partkey (type: int) + | sort order:+ + | Statistics:Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_15] + | predicate:p_partkey is not null (type: boolean) + | Statistics:Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_1] + | alias:p1 + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + |<-Filter Operator [FIL_14] + predicate:_col0 is not null (type: boolean) + Statistics:Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_4] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + | Statistics:Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_2] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name, p_size desc) as r +from noopwithmap(on part +partition by p_mfgr +order by p_name, p_size desc) +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_10] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_8] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_7] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1, _col5(DESC)"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_6] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col2 (type: string), _col1 (type: string), _col5 (type: int) + Map-reduce partition columns:_col2 (type: string) + sort order:++- + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_4] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noopwithmap","order by:":"_col1, _col5(DESC)"}}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_3] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_2] + key expressions:p_mfgr (type: string), p_name (type: string), p_size (type: int) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++- + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_1] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"p_mfgr","name:":"noopwithmap","order by:":"p_name, p_size(DESC)"}}] + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noopwithmap(on part + partition by p_mfgr + order by p_name) +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_10] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_8] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_7] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_6] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col5 (type: int), _col7 (type: double) + PTF Operator [PTF_4] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noopwithmap","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_3] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_2] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int), p_retailprice (type: double) + PTF Operator [PTF_1] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"p_mfgr","name:":"noopwithmap","order by:":"p_name"}}] + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on part +partition by p_mfgr +order by p_name) +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_9] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_6] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_5] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col5 (type: int), _col7 (type: double) + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int), p_retailprice (type: double) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr DESC, p_name +))) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on noopwithmap(on noop(on part +partition by p_mfgr +order by p_mfgr DESC, p_name +))) +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_13] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_11] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_10] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_9] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col5 (type: int), _col7 (type: double) + PTF Operator [PTF_7] + Function definitions:[{"Input definition":{"type:":"PTFCOMPONENT"}},{"Partition table definition":{"partition by:":"_col2","name:":"noopwithmap","order by:":"_col2(DESC), _col1"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col2(DESC), _col1"}}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_6] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:-+ + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col5 (type: int), _col7 (type: double) + PTF Operator [PTF_4] + Function definitions:[{"Input definition":{"type:":"PTFCOMPONENT"}},{"Partition table definition":{"partition by:":"_col2","name:":"noopwithmap","order by:":"_col2(DESC), _col1"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col2(DESC), _col1"}}] + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col2(DESC), _col1"}}] + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:-+ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int), p_retailprice (type: double) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, +sub1.cd, sub1.s1 +from (select p_mfgr, p_name, +count(p_size) over (partition by p_mfgr order by p_name) as cd, +p_retailprice, +sum(p_retailprice) over w1 as s1 +from noop(on part +partition by p_mfgr +order by p_name) +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +) sub1 +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_10] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2","_col3"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_6] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_5] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col5 (type: int), _col7 (type: double) + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int), p_retailprice (type: double) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: explain +select abc.p_mfgr, abc.p_name, +rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, +count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, +abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over (distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_15] + compressed:true + Statistics:Num rows: 14 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_13] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Statistics:Num rows: 14 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_12] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 14 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_11] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 14 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 14 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col5 (type: int), _col7 (type: double) + Map Join Operator [MAPJOIN_20] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Reducer 2":"_col0 (type: int)","Map 4":"p_partkey (type: int)"} + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 14 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_8] + | key expressions:p_partkey (type: int) + | Map-reduce partition columns:p_partkey (type: int) + | sort order:+ + | Statistics:Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_19] + | predicate:p_partkey is not null (type: boolean) + | Statistics:Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_1] + | alias:p1 + | Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + |<-Filter Operator [FIL_18] + predicate:_col0 is not null (type: boolean) + Statistics:Num rows: 13 Data size: 3055 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_4] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_2] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_partkey (type: int), p_size (type: int), p_retailprice (type: double) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +sum(p_retailprice) as s +from part +group by p_mfgr, p_brand +PREHOOK: type: CREATEVIEW +POSTHOOK: query: explain create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +sum(p_retailprice) as s +from part +group by p_mfgr, p_brand +POSTHOOK: type: CREATEVIEW +Plan not optimized by CBO. + +Stage-0 + Create View Operator: + name:default.mfgr_price_view + original text:select p_mfgr, p_brand, +sum(p_retailprice) as s +from part +group by p_mfgr, p_brand +PREHOOK: query: CREATE TABLE part_4( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +s DOUBLE) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_4 +POSTHOOK: query: CREATE TABLE part_4( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +s DOUBLE) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_4 +PREHOOK: query: CREATE TABLE part_5( +p_mfgr STRING, +p_name STRING, +p_size INT, +s2 INT, +r INT, +dr INT, +cud DOUBLE, +fv1 INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_5 +POSTHOOK: query: CREATE TABLE part_5( +p_mfgr STRING, +p_name STRING, +p_size INT, +s2 INT, +r INT, +dr INT, +cud DOUBLE, +fv1 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_5 +PREHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain +from noop(on part +partition by p_mfgr +order by p_name) +INSERT OVERWRITE TABLE part_4 select p_mfgr, p_name, p_size, +rank() over (distribute by p_mfgr sort by p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_name) as dr, +sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s +INSERT OVERWRITE TABLE part_5 select p_mfgr,p_name, p_size, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as r, +dense_rank() over (distribute by p_mfgr sort by p_mfgr, p_name) as dr, +cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, +first_value(p_size, true) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 2 (SIMPLE_EDGE) + +Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.part_5","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 3 + File Output Operator [FS_9] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.part_4","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_6] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_5] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col5 (type: int), _col7 (type: double) + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5","_col7"] + | Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int), p_retailprice (type: double) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + File Output Operator [FS_20] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.part_5","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_17] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_16] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col3","name:":"windowingtablefunction","order by:":"_col3, _col2"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_15] + | outputColumnNames:["_col0","_col2","_col3","_col6"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_14] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_wcol0 (type: bigint), _col5 (type: int) + Select Operator [SEL_13] + outputColumnNames:["_col1","_col2","_col5","_wcol0"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_12] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col5"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_11] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col2 (type: string), _col5 (type: int) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: string) + Please refer to the previous PTF Operator [PTF_3] +Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.part_4","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Please refer to the previous Stage-3 +PREHOOK: query: explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr,p_name) as r, +dense_rank() over (partition by p_mfgr,p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 +from noop(on + noopwithmap(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr,p_name + order by p_mfgr,p_name) +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_13] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_11] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_10] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2, _col1","name:":"windowingtablefunction","order by:":"_col2, _col1"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_9] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col5 (type: int) + PTF Operator [PTF_7] + Function definitions:[{"Input definition":{"type:":"PTFCOMPONENT"}},{"Partition table definition":{"partition by:":"_col2, _col1","name:":"noopwithmap","order by:":"_col2, _col1"}},{"Partition table definition":{"partition by:":"_col2, _col1","name:":"noop","order by:":"_col2, _col1"}}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_6] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col5 (type: int) + PTF Operator [PTF_4] + Function definitions:[{"Input definition":{"type:":"PTFCOMPONENT"}},{"Partition table definition":{"partition by:":"_col2, _col1","name:":"noopwithmap","order by:":"_col2, _col1"}},{"Partition table definition":{"partition by:":"_col2, _col1","name:":"noop","order by:":"_col2, _col1"}}] + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col2"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col2"}}] + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:+ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_name (type: string), p_size (type: int) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr + order by p_mfgr) + ) + partition by p_mfgr,p_name + order by p_mfgr,p_name) + partition by p_mfgr + order by p_mfgr ) +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 5 + File Output Operator [FS_15] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_13] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_12] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_11] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col5 (type: int) + PTF Operator [PTF_9] + Function definitions:[{"Input definition":{"type:":"PTFCOMPONENT"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col2"}}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_8] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col2 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:+ + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col1 (type: string), _col5 (type: int) + PTF Operator [PTF_6] + Function definitions:[{"Input definition":{"type:":"PTFCOMPONENT"}},{"Partition table definition":{"partition by:":"_col2, _col1","name:":"noop","order by:":"_col2, _col1"}}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_5] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col5 (type: int) + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col2"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col2"}}] + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:+ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_name (type: string), p_size (type: int) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, +rank() over (partition by p_mfgr order by p_name) as r, +dense_rank() over (partition by p_mfgr order by p_name) as dr, +p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 +from noop(on + noop(on + noop(on + noop(on part + partition by p_mfgr,p_name + order by p_mfgr,p_name) + ) + partition by p_mfgr + order by p_mfgr)) +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_12] + compressed:true + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_10] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + PTF Operator [PTF_9] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col2","name:":"windowingtablefunction","order by:":"_col1"}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_8] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col2 (type: string), _col1 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions:_col5 (type: int) + PTF Operator [PTF_6] + Function definitions:[{"Input definition":{"type:":"PTFCOMPONENT"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col2"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col2"}}] + Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_5] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_4] + key expressions:_col2 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:+ + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: string), _col5 (type: int) + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2, _col1","name:":"noop","order by:":"_col2, _col1"}},{"Partition table definition":{"partition by:":"_col2, _col1","name:":"noop","order by:":"_col2, _col1"}}] + Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string), p_name (type: string) + sort order:++ + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select distinct src.* from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select distinct src.* from src +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_8] + compressed:true + Statistics:Num rows: 250 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_6] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator [GBY_4] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:rand() (type: double) + sort order:++ + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_1] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_0] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select explode(array('a', 'b')) +PREHOOK: type: QUERY +POSTHOOK: query: explain select explode(array('a', 'b')) +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Others]. + +Stage-0 + Fetch Operator + limit:-1 + UDTF Operator [UDTF_2] + function name:explode + Select Operator [SEL_1] + outputColumnNames:["_col0"] + TableScan [TS_0] + alias:_dummy_table +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T4 +POSTHOOK: query: CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T4 +PREHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t4 +PREHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 select src1.key, src2.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 select src1.key, src2.value +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest_j1","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 2 + File Output Operator [FS_11] + compressed:true + Statistics:Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest_j1","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_9] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_16] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2"] + | Statistics:Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: string) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_14] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:src1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_3] + outputColumnNames:["_col0"] + Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_15] + predicate:key is not null (type: boolean) + Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_2] + alias:src1 + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 select src1.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 select src1.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select /*+ STREAMTABLE(a) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+ STREAMTABLE(a) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Hint]. + +Vertex dependency in root stage +Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 + File Output Operator [FS_14] + compressed:true + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_13] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator [MAPJOIN_31] + | condition map:[{"":"Inner Join 0 to 1"},{"":"Inner Join 1 to 2"},{"":"Inner Join 2 to 3"}] + | keys:{"Map 1":"key (type: string)","Map 2":"key (type: string)","Map 3":"key (type: string)","Map 4":"key (type: string)"} + | outputColumnNames:["_col0","_col1","_col5","_col6","_col10","_col11","_col15","_col16"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_11] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | value expressions:val (type: string) + | Filter Operator [FIL_30] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | TableScan [TS_3] + | alias:d + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-Map 3 [BROADCAST_EDGE] + | Reduce Output Operator [RS_9] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | value expressions:val (type: string) + | Filter Operator [FIL_29] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | TableScan [TS_2] + | alias:c + | Statistics:Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + |<-Map 2 [BROADCAST_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | value expressions:val (type: string) + | Filter Operator [FIL_28] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | TableScan [TS_1] + | alias:b + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-Filter Operator [FIL_27] + predicate:key is not null (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan [TS_0] + alias:a + Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE +PREHOOK: query: explain +select /*+ STREAMTABLE(a,c) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+ STREAMTABLE(a,c) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Hint]. + +Vertex dependency in root stage +Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 + File Output Operator [FS_14] + compressed:true + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_13] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator [MAPJOIN_31] + | condition map:[{"":"Inner Join 0 to 1"},{"":"Inner Join 1 to 2"},{"":"Inner Join 2 to 3"}] + | keys:{"Map 1":"key (type: string)","Map 2":"key (type: string)","Map 3":"key (type: string)","Map 4":"key (type: string)"} + | outputColumnNames:["_col0","_col1","_col5","_col6","_col10","_col11","_col15","_col16"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_11] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | value expressions:val (type: string) + | Filter Operator [FIL_30] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | TableScan [TS_3] + | alias:d + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-Map 3 [BROADCAST_EDGE] + | Reduce Output Operator [RS_9] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | value expressions:val (type: string) + | Filter Operator [FIL_29] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | TableScan [TS_2] + | alias:c + | Statistics:Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + |<-Map 2 [BROADCAST_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | value expressions:val (type: string) + | Filter Operator [FIL_28] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | TableScan [TS_1] + | alias:b + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-Filter Operator [FIL_27] + predicate:key is not null (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan [TS_0] + alias:a + Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE +PREHOOK: query: explain FROM T1 a JOIN src c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) +PREHOOK: type: QUERY +POSTHOOK: query: explain FROM T1 a JOIN src c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Hint]. + +Vertex dependency in root stage +Reducer 3 <- Map 2 (SIMPLE_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_13] + compressed:true + Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_11] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + sort order: + Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + Group By Operator [GBY_9] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + |<-Map 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + Map-reduce partition columns:rand() (type: double) + sort order: + Statistics:Num rows: 275 Data size: 23925 Basic stats: COMPLETE Column stats: NONE + value expressions:hash(_col0) (type: int), hash(_col1) (type: int), hash(_col5) (type: int) + Map Join Operator [MAPJOIN_18] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 1":"UDFToDouble(key) (type: double)","Map 2":"(key + 1) (type: double)"} + | outputColumnNames:["_col0","_col1","_col5"] + | Statistics:Num rows: 275 Data size: 23925 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [BROADCAST_EDGE] + | Reduce Output Operator [RS_3] + | key expressions:UDFToDouble(key) (type: double) + | Map-reduce partition columns:UDFToDouble(key) (type: double) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | value expressions:key (type: string), val (type: string) + | Filter Operator [FIL_16] + | predicate:UDFToDouble(key) is not null (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-Filter Operator [FIL_17] + predicate:(key + 1) is not null (type: boolean) + Statistics:Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_1] + alias:c + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: FROM T1 a JOIN src c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: FROM T1 a JOIN src c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +198 6274 194 +PREHOOK: query: explain +select * FROM +(select src.* FROM src) x +JOIN +(select src.* FROM src) Y +ON (x.key = Y.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * FROM +(select src.* FROM src) x +JOIN +(select src.* FROM src) Y +ON (x.key = Y.key) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_10] + compressed:true + Statistics:Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_15] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1219 Data size: 433964 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col1 (type: string) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_13] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:src + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_7] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col1 (type: string) + Select Operator [SEL_3] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_14] + predicate:key is not null (type: boolean) + Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_2] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE +PREHOOK: query: explain select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.val +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.val +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Hint]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Map 1 <- Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_13] + compressed:true + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_11] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + sort order: + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint), _col1 (type: bigint) + Group By Operator [GBY_9] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + Map-reduce partition columns:rand() (type: double) + sort order: + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions:hash(_col0) (type: int), hash(_col6) (type: int) + Map Join Operator [MAPJOIN_18] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 1":"key (type: string)","Map 4":"val (type: string)"} + | outputColumnNames:["_col0","_col6"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:val (type: string) + | Map-reduce partition columns:val (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | Filter Operator [FIL_17] + | predicate:val is not null (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | TableScan [TS_1] + | alias:v + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-Filter Operator [FIL_16] + predicate:key is not null (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan [TS_0] + alias:k + Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE +PREHOOK: query: explain select sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing statistics. Please check log for more details. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Map 1 <- Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_13] + compressed:true + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_11] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + sort order: + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint), _col1 (type: bigint) + Group By Operator [GBY_9] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + Map-reduce partition columns:rand() (type: double) + sort order: + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions:hash(_col0) (type: int), hash(_col6) (type: int) + Map Join Operator [MAPJOIN_18] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 1":"key (type: string)","Map 4":"key (type: string)"} + | outputColumnNames:["_col0","_col6"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | value expressions:val (type: string) + | Filter Operator [FIL_17] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | TableScan [TS_1] + | alias:v + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-Filter Operator [FIL_16] + predicate:key is not null (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan [TS_0] + alias:k + Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE +PREHOOK: query: explain select count(1) from T1 a join T1 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from T1 a join T1 b on a.key = b.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing statistics. Please check log for more details. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Map 1 <- Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_13] + compressed:true + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_11] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_9] + | aggregations:["count(1)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + Map-reduce partition columns:rand() (type: double) + sort order: + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map Join Operator [MAPJOIN_18] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 1":"key (type: string)","Map 4":"key (type: string)"} + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | Filter Operator [FIL_17] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + | TableScan [TS_1] + | alias:b + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-Filter Operator [FIL_16] + predicate:key is not null (type: boolean) + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan [TS_0] + alias:a + Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE +PREHOOK: query: explain FROM T1 a LEFT OUTER JOIN T2 c ON c.key+1=a.key select sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) +PREHOOK: type: QUERY +POSTHOOK: query: explain FROM T1 a LEFT OUTER JOIN T2 c ON c.key+1=a.key select sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing statistics. Please check log for more details. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Map 1 <- Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_11] + compressed:true + Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_9] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + sort order: + Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + Group By Operator [GBY_7] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_6] + Map-reduce partition columns:rand() (type: double) + sort order: + Statistics:Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + value expressions:hash(_col0) (type: int), hash(_col1) (type: int), hash(_col5) (type: int) + Map Join Operator [MAPJOIN_14] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"Map 1":"UDFToDouble(key) (type: double)","Map 4":"(key + 1) (type: double)"} + | outputColumnNames:["_col0","_col1","_col5"] + | Statistics:Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_3] + | key expressions:(key + 1) (type: double) + | Map-reduce partition columns:(key + 1) (type: double) + | sort order:+ + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + | value expressions:key (type: string) + | TableScan [TS_1] + | alias:c + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-TableScan [TS_0] + alias:a + Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE +PREHOOK: query: explain FROM T1 a RIGHT OUTER JOIN T2 c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) +PREHOOK: type: QUERY +POSTHOOK: query: explain FROM T1 a RIGHT OUTER JOIN T2 c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Hint]. + +Vertex dependency in root stage +Reducer 3 <- Map 2 (SIMPLE_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_11] + compressed:true + Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_9] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + sort order: + Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + Group By Operator [GBY_7] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + |<-Map 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_6] + Map-reduce partition columns:rand() (type: double) + sort order: + Statistics:Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + value expressions:hash(_col0) (type: int), hash(_col1) (type: int), hash(_col5) (type: int) + Map Join Operator [MAPJOIN_14] + | condition map:[{"":"Right Outer Join0 to 1"}] + | keys:{"Map 1":"UDFToDouble(key) (type: double)","Map 2":"(key + 1) (type: double)"} + | outputColumnNames:["_col0","_col1","_col5"] + | Statistics:Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + |<-Map 1 [BROADCAST_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:UDFToDouble(key) (type: double) + | Map-reduce partition columns:UDFToDouble(key) (type: double) + | sort order:+ + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + | value expressions:key (type: string), val (type: string) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-TableScan [TS_1] + alias:c + Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE +PREHOOK: query: explain FROM T1 a FULL OUTER JOIN T2 c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) +PREHOOK: type: QUERY +POSTHOOK: query: explain FROM T1 a FULL OUTER JOIN T2 c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Hint]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_11] + compressed:true + Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_9] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + sort order: + Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + Group By Operator [GBY_7] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_6] + Map-reduce partition columns:rand() (type: double) + sort order: + Statistics:Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + value expressions:hash(_col0) (type: int), hash(_col1) (type: int), hash(_col5) (type: int) + Merge Join Operator [MERGEJOIN_12] + | condition map:[{"":"Outer Join 0 to 1"}] + | keys:{"1":"(key + 1) (type: double)","0":"UDFToDouble(key) (type: double)"} + | outputColumnNames:["_col0","_col1","_col5"] + | Statistics:Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_2] + | key expressions:UDFToDouble(key) (type: double) + | Map-reduce partition columns:UDFToDouble(key) (type: double) + | sort order:+ + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + | value expressions:key (type: string), val (type: string) + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-Map 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_3] + key expressions:(key + 1) (type: double) + Map-reduce partition columns:(key + 1) (type: double) + sort order:+ + Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions:key (type: string) + TableScan [TS_1] + alias:c + Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE +PREHOOK: query: explain select /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k left outer join T1 v on k.key+1=v.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k left outer join T1 v on k.key+1=v.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Hint]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Map 1 <- Map 4 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 + File Output Operator [FS_11] + compressed:true + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_9] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_8] + sort order: + Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint), _col1 (type: bigint) + Group By Operator [GBY_7] + | aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_6] + Map-reduce partition columns:rand() (type: double) + sort order: + Statistics:Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + value expressions:hash(_col0) (type: int), hash(_col6) (type: int) + Map Join Operator [MAPJOIN_14] + | condition map:[{"":"Left Outer Join0 to 1"}] + | keys:{"Map 1":"(key + 1) (type: double)","Map 4":"UDFToDouble(key) (type: double)"} + | outputColumnNames:["_col0","_col6"] + | Statistics:Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_3] + | key expressions:UDFToDouble(key) (type: double) + | Map-reduce partition columns:UDFToDouble(key) (type: double) + | sort order:+ + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + | value expressions:val (type: string) + | TableScan [TS_1] + | alias:v + | Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + |<-TableScan [TS_0] + alias:k + Statistics:Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Index: ql/src/test/results/clientpositive/tez/explainuser_2.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/explainuser_2.q.out (revision 0) +++ ql/src/test/results/clientpositive/tez/explainuser_2.q.out (working copy) @@ -0,0 +1,4626 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: CREATE TABLE ss(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ss +POSTHOOK: query: CREATE TABLE ss(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ss +PREHOOK: query: CREATE TABLE sr(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sr +POSTHOOK: query: CREATE TABLE sr(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sr +PREHOOK: query: CREATE TABLE cs(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@cs +POSTHOOK: query: CREATE TABLE cs(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cs +PREHOOK: query: INSERT OVERWRITE TABLE ss +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@ss +POSTHOOK: query: INSERT OVERWRITE TABLE ss +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@ss +POSTHOOK: Lineage: ss.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE sr +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=12) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@sr +POSTHOOK: query: INSERT OVERWRITE TABLE sr +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=12) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@sr +POSTHOOK: Lineage: sr.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sr.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sr.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sr.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sr.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sr.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE cs +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@cs +POSTHOOK: query: INSERT OVERWRITE TABLE cs +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@cs +POSTHOOK: Lineage: cs.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: cs.k2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: cs.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: cs.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: cs.v2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: cs.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ANALYZE TABLE ss COMPUTE STATISTICS +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +PREHOOK: Output: default@ss +POSTHOOK: query: ANALYZE TABLE ss COMPUTE STATISTICS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +POSTHOOK: Output: default@ss +PREHOOK: query: ANALYZE TABLE ss COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE ss COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE sr COMPUTE STATISTICS +PREHOOK: type: QUERY +PREHOOK: Input: default@sr +PREHOOK: Output: default@sr +POSTHOOK: query: ANALYZE TABLE sr COMPUTE STATISTICS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sr +POSTHOOK: Output: default@sr +PREHOOK: query: ANALYZE TABLE sr COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 +PREHOOK: type: QUERY +PREHOOK: Input: default@sr +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE sr COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sr +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE cs COMPUTE STATISTICS +PREHOOK: type: QUERY +PREHOOK: Input: default@cs +PREHOOK: Output: default@cs +POSTHOOK: query: ANALYZE TABLE cs COMPUTE STATISTICS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cs +POSTHOOK: Output: default@cs +PREHOOK: query: ANALYZE TABLE cs COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 +PREHOOK: type: QUERY +PREHOOK: Input: default@cs +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE cs COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cs +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_19] + compressed:false + Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_18] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_29] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_14] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_25] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_16] + key expressions:_col3 (type: string) + Map-reduce partition columns:_col3 (type: string) + sort order:+ + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: string) + Merge Join Operator [MERGEJOIN_28] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col3"] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_8] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_4] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_26] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_2] + | alias:z + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_10] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) + sort order:+ + Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: string) + Select Operator [SEL_6] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_27] + predicate:(value is not null and key is not null) (type: boolean) + Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_5] + alias:x + Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: EXPLAIN +select +ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3) +FROM +ss,sr,cs,src d1,src d2,src d3,src1,srcpart +where + ss.k1 = d1.key +and sr.k1 = d2.key +and cs.k1 = d3.key +and ss.k2 = sr.k2 +and ss.k3 = sr.k3 +and ss.v1 = src1.value +and ss.v2 = srcpart.value +and sr.v2 = cs.v2 +and sr.v3 = cs.v3 +and ss.v3='ssv3' +and sr.v1='srv1' +and src1.key = 'src1key' +and srcpart.key = 'srcpartkey' +and d1.value = 'd1value' +and d2.value in ('2000Q1','2000Q2','2000Q3') +and d3.value in ('2000Q1','2000Q2','2000Q3') +group by +ss.k1,sr.k2,cs.k3 +order by +ss.k1,sr.k2,cs.k3 +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +select +ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3) +FROM +ss,sr,cs,src d1,src d2,src d3,src1,srcpart +where + ss.k1 = d1.key +and sr.k1 = d2.key +and cs.k1 = d3.key +and ss.k2 = sr.k2 +and ss.k3 = sr.k3 +and ss.v1 = src1.value +and ss.v2 = srcpart.value +and sr.v2 = cs.v2 +and sr.v3 = cs.v3 +and ss.v3='ssv3' +and sr.v1='srv1' +and src1.key = 'src1key' +and srcpart.key = 'srcpartkey' +and d1.value = 'd1value' +and d2.value in ('2000Q1','2000Q2','2000Q3') +and d3.value in ('2000Q1','2000Q2','2000Q3') +group by +ss.k1,sr.k2,cs.k3 +order by +ss.k1,sr.k2,cs.k3 +limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 9 <- Reducer 16 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 + File Output Operator [FS_71] + compressed:false + Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Limit [LIM_70] + Number of rows:100 + Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_69] + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_68] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) + Group By Operator [GBY_66] + | aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_65] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) + Group By Operator [GBY_64] + aggregations:["count(_col3)","count(_col4)","count(_col5)"] + keys:_col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_62] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_113] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col15 (type: string), _col17 (type: string)","0":"_col1 (type: string), _col3 (type: string)"} + | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"] + | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_58] + | key expressions:_col1 (type: string), _col3 (type: string) + | Map-reduce partition columns:_col1 (type: string), _col3 (type: string) + | sort order:++ + | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col2 (type: string) + | Merge Join Operator [MERGEJOIN_107] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3"] + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_53] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_99] + | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_0] + | | alias:cs + | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE + | |<-Map 6 [SIMPLE_EDGE] + | Reduce Output Operator [RS_55] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_4] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_100] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_2] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_60] + key expressions:_col15 (type: string), _col17 (type: string) + Map-reduce partition columns:_col15 (type: string), _col17 (type: string) + sort order:++ + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + value expressions:_col6 (type: string), _col7 (type: string), _col14 (type: string) + Select Operator [SEL_51] + outputColumnNames:["_col14","_col15","_col17","_col6","_col7"] + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_112] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col2 (type: string), _col4 (type: string)","0":"_col8 (type: string), _col10 (type: string)"} + | outputColumnNames:["_col6","_col7","_col14","_col15","_col17"] + | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 16 [SIMPLE_EDGE] + | Reduce Output Operator [RS_49] + | key expressions:_col2 (type: string), _col4 (type: string) + | Map-reduce partition columns:_col2 (type: string), _col4 (type: string) + | sort order:++ + | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col3 (type: string), _col5 (type: string) + | Merge Join Operator [MERGEJOIN_111] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | |<-Map 15 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_36] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + | | Select Operator [SEL_31] + | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_105] + | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_29] + | | alias:sr + | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + | |<-Map 17 [SIMPLE_EDGE] + | Reduce Output Operator [RS_38] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_34] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_106] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_32] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_47] + key expressions:_col8 (type: string), _col10 (type: string) + Map-reduce partition columns:_col8 (type: string), _col10 (type: string) + sort order:++ + Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + value expressions:_col6 (type: string), _col7 (type: string) + Merge Join Operator [MERGEJOIN_110] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col5 (type: string)","0":"_col1 (type: string)"} + | outputColumnNames:["_col6","_col7","_col8","_col10"] + | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + |<-Map 7 [SIMPLE_EDGE] + | Reduce Output Operator [RS_42] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_7] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_101] + | predicate:((key = 'src1key') and value is not null) (type: boolean) + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_5] + | alias:src1 + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 11 [SIMPLE_EDGE] + Reduce Output Operator [RS_44] + key expressions:_col5 (type: string) + Map-reduce partition columns:_col5 (type: string) + sort order:+ + Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + value expressions:_col4 (type: string), _col6 (type: string), _col8 (type: string) + Merge Join Operator [MERGEJOIN_109] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col4","_col5","_col6","_col8"] + | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + |<-Map 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_24] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_102] + | predicate:((value = 'd1value') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_8] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 13 [SIMPLE_EDGE] + Reduce Output Operator [RS_26] + key expressions:_col2 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:+ + Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions:_col3 (type: string), _col4 (type: string), _col6 (type: string) + Merge Join Operator [MERGEJOIN_108] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col3 (type: string)","0":"_col1 (type: string)"} + | outputColumnNames:["_col2","_col3","_col4","_col6"] + | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + |<-Map 12 [SIMPLE_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_13] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_103] + | predicate:((key = 'srcpartkey') and value is not null) (type: boolean) + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_11] + | alias:srcpart + | Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + |<-Map 14 [SIMPLE_EDGE] + Reduce Output Operator [RS_20] + key expressions:_col3 (type: string) + Map-reduce partition columns:_col3 (type: string) + sort order:+ + Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) + Select Operator [SEL_16] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_104] + predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) + Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_14] + alias:ss + Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 13 <- Union 12 (SIMPLE_EDGE) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 11 <- Union 12 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 7 <- Union 2 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Map 15 <- Union 12 (CONTAINS) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 + File Output Operator [FS_61] + compressed:false + Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_59] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + |<-Union 5 [SIMPLE_EDGE] + |<-Reducer 14 [CONTAINS] + | Reduce Output Operator [RS_58] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_57] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_53] + | outputColumnNames:["_col0","_col1"] + | Merge Join Operator [MERGEJOIN_85] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col0","_col2"] + | |<-Reducer 13 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_49] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_37] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_36] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 12 [SIMPLE_EDGE] + | | |<-Map 11 [CONTAINS] + | | | Reduce Output Operator [RS_35] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_34] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_28] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_78] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_27] + | | | alias:x + | | |<-Map 15 [CONTAINS] + | | Reduce Output Operator [RS_35] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_34] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_30] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_79] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_29] + | | alias:y + | |<-Reducer 17 [SIMPLE_EDGE] + | Reduce Output Operator [RS_51] + | key expressions:_col2 (type: string) + | Map-reduce partition columns:_col2 (type: string) + | sort order:+ + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Merge Join Operator [MERGEJOIN_83] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2"] + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | |<-Map 16 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_43] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_39] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_80] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_38] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map 18 [SIMPLE_EDGE] + | Reduce Output Operator [RS_45] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_41] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_81] + | predicate:(key is not null and value is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_40] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [CONTAINS] + Reduce Output Operator [RS_58] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_57] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_26] + outputColumnNames:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_84] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col2"] + |<-Reducer 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_22] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_9] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 2 [SIMPLE_EDGE] + | |<-Map 1 [CONTAINS] + | | Reduce Output Operator [RS_8] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_7] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_74] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_0] + | | alias:x + | |<-Map 7 [CONTAINS] + | Reduce Output Operator [RS_8] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_7] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_75] + | predicate:value is not null (type: boolean) + | TableScan [TS_2] + | alias:y + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_24] + key expressions:_col2 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:+ + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: string) + Merge Join Operator [MERGEJOIN_82] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2"] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Map 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_14] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_77] + | predicate:(key is not null and value is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_13] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Map 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_16] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_12] + outputColumnNames:["_col0"] + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_76] + predicate:key is not null (type: boolean) + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_11] + alias:y + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 31 <- Reducer 30 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) +Map 24 <- Union 25 (CONTAINS) +Map 32 <- Union 25 (CONTAINS) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 30 <- Union 29 (SIMPLE_EDGE) +Map 13 <- Union 14 (CONTAINS) +Map 34 <- Union 29 (CONTAINS) +Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Map 1 <- Union 2 (CONTAINS) +Map 20 <- Union 16 (CONTAINS) +Map 33 <- Union 27 (CONTAINS) +Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 19 <- Union 14 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 26 <- Union 25 (SIMPLE_EDGE), Union 27 (CONTAINS) +Reducer 17 <- Union 16 (SIMPLE_EDGE) +Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 28 <- Union 27 (SIMPLE_EDGE), Union 29 (CONTAINS) +Reducer 15 <- Union 14 (SIMPLE_EDGE), Union 16 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Map 9 <- Union 2 (CONTAINS) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 8 + File Output Operator [FS_122] + compressed:false + Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_120] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + |<-Union 7 [SIMPLE_EDGE] + |<-Reducer 31 [CONTAINS] + | Reduce Output Operator [RS_119] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_118] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_114] + | outputColumnNames:["_col0","_col1"] + | Merge Join Operator [MERGEJOIN_164] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col2","_col3"] + | |<-Reducer 30 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_110] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_98] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_97] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 29 [SIMPLE_EDGE] + | | |<-Map 34 [CONTAINS] + | | | Reduce Output Operator [RS_96] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_95] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_91] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_156] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_90] + | | | alias:y + | | |<-Reducer 28 [CONTAINS] + | | Reduce Output Operator [RS_96] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_95] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Group By Operator [GBY_88] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | |<-Union 27 [SIMPLE_EDGE] + | | |<-Map 33 [CONTAINS] + | | | Reduce Output Operator [RS_87] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_86] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_82] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_155] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_81] + | | | alias:y + | | |<-Reducer 26 [CONTAINS] + | | Reduce Output Operator [RS_87] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_86] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Group By Operator [GBY_79] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | |<-Union 25 [SIMPLE_EDGE] + | | |<-Map 24 [CONTAINS] + | | | Reduce Output Operator [RS_78] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_77] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_71] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_153] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_70] + | | | alias:x + | | |<-Map 32 [CONTAINS] + | | Reduce Output Operator [RS_78] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_77] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_73] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_154] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_72] + | | alias:y + | |<-Reducer 36 [SIMPLE_EDGE] + | Reduce Output Operator [RS_112] + | key expressions:_col3 (type: string) + | Map-reduce partition columns:_col3 (type: string) + | sort order:+ + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string), _col2 (type: string) + | Merge Join Operator [MERGEJOIN_161] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3"] + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | |<-Map 35 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_104] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_100] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_157] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_99] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map 37 [SIMPLE_EDGE] + | Reduce Output Operator [RS_106] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_102] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_158] + | predicate:(key is not null and value is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_101] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 6 [CONTAINS] + Reduce Output Operator [RS_119] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_118] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Group By Operator [GBY_68] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + |<-Union 5 [SIMPLE_EDGE] + |<-Reducer 4 [CONTAINS] + | Reduce Output Operator [RS_67] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_66] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_26] + | outputColumnNames:["_col0","_col1"] + | Merge Join Operator [MERGEJOIN_162] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col2","_col3"] + | |<-Reducer 11 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_24] + | | key expressions:_col3 (type: string) + | | Map-reduce partition columns:_col3 (type: string) + | | sort order:+ + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string) + | | Merge Join Operator [MERGEJOIN_159] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | | outputColumnNames:["_col1","_col2","_col3"] + | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 10 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_16] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: string) + | | | Select Operator [SEL_12] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_146] + | | | predicate:key is not null (type: boolean) + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_11] + | | | alias:y + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 12 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_18] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_14] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_147] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_13] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_22] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_9] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 2 [SIMPLE_EDGE] + | |<-Map 1 [CONTAINS] + | | Reduce Output Operator [RS_8] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_7] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_144] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_0] + | | alias:x + | |<-Map 9 [CONTAINS] + | Reduce Output Operator [RS_8] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_7] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_145] + | predicate:value is not null (type: boolean) + | TableScan [TS_2] + | alias:y + |<-Reducer 18 [CONTAINS] + Reduce Output Operator [RS_67] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_66] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_62] + outputColumnNames:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_163] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col2","_col3"] + |<-Reducer 17 [SIMPLE_EDGE] + | Reduce Output Operator [RS_58] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_46] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_45] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | |<-Union 16 [SIMPLE_EDGE] + | |<-Map 20 [CONTAINS] + | | Reduce Output Operator [RS_44] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_43] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_39] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_150] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_38] + | | alias:y + | |<-Reducer 15 [CONTAINS] + | Reduce Output Operator [RS_44] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_43] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Group By Operator [GBY_36] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | |<-Union 14 [SIMPLE_EDGE] + | |<-Map 13 [CONTAINS] + | | Reduce Output Operator [RS_35] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_34] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_28] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_148] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_27] + | | alias:x + | |<-Map 19 [CONTAINS] + | Reduce Output Operator [RS_35] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_34] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_30] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_149] + | predicate:value is not null (type: boolean) + | TableScan [TS_29] + | alias:y + |<-Reducer 22 [SIMPLE_EDGE] + Reduce Output Operator [RS_60] + key expressions:_col3 (type: string) + Map-reduce partition columns:_col3 (type: string) + sort order:+ + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: string), _col2 (type: string) + Merge Join Operator [MERGEJOIN_160] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3"] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Map 21 [SIMPLE_EDGE] + | Reduce Output Operator [RS_52] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_48] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_151] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_47] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map 23 [SIMPLE_EDGE] + Reduce Output Operator [RS_54] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: string) + Select Operator [SEL_50] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_152] + predicate:(key is not null and value is not null) (type: boolean) + Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_49] + alias:x + Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: EXPLAIN +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 2 + File Output Operator [FS_19] + compressed:false + Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_18] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_29] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col3 (type: string)"} + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [BROADCAST_EDGE] + | Reduce Output Operator [RS_14] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_25] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_28] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 2":"_col0 (type: string)","Map 3":"_col1 (type: string)"} + | outputColumnNames:["_col0","_col3"] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [BROADCAST_EDGE] + | Reduce Output Operator [RS_10] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Select Operator [SEL_6] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_27] + | predicate:(value is not null and key is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_5] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_4] + outputColumnNames:["_col0"] + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_26] + predicate:value is not null (type: boolean) + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:z + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: EXPLAIN +select +ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3) +FROM +ss,sr,cs,src d1,src d2,src d3,src1,srcpart +where + ss.k1 = d1.key +and sr.k1 = d2.key +and cs.k1 = d3.key +and ss.k2 = sr.k2 +and ss.k3 = sr.k3 +and ss.v1 = src1.value +and ss.v2 = srcpart.value +and sr.v2 = cs.v2 +and sr.v3 = cs.v3 +and ss.v3='ssv3' +and sr.v1='srv1' +and src1.key = 'src1key' +and srcpart.key = 'srcpartkey' +and d1.value = 'd1value' +and d2.value in ('2000Q1','2000Q2','2000Q3') +and d3.value in ('2000Q1','2000Q2','2000Q3') +group by +ss.k1,sr.k2,cs.k3 +order by +ss.k1,sr.k2,cs.k3 +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +select +ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3) +FROM +ss,sr,cs,src d1,src d2,src d3,src1,srcpart +where + ss.k1 = d1.key +and sr.k1 = d2.key +and cs.k1 = d3.key +and ss.k2 = sr.k2 +and ss.k3 = sr.k3 +and ss.v1 = src1.value +and ss.v2 = srcpart.value +and sr.v2 = cs.v2 +and sr.v3 = cs.v3 +and ss.v3='ssv3' +and sr.v1='srv1' +and src1.key = 'src1key' +and srcpart.key = 'srcpartkey' +and d1.value = 'd1value' +and d2.value in ('2000Q1','2000Q2','2000Q3') +and d3.value in ('2000Q1','2000Q2','2000Q3') +group by +ss.k1,sr.k2,cs.k3 +order by +ss.k1,sr.k2,cs.k3 +limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 2 <- Map 1 (BROADCAST_EDGE) +Map 10 <- Map 9 (BROADCAST_EDGE) +Map 5 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 + File Output Operator [FS_71] + compressed:false + Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Limit [LIM_70] + Number of rows:100 + Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_69] + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_68] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) + Group By Operator [GBY_66] + | aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + |<-Map 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_65] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) + Group By Operator [GBY_64] + aggregations:["count(_col3)","count(_col4)","count(_col5)"] + keys:_col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_62] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_113] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 2":"_col1 (type: string), _col3 (type: string)","Map 5":"_col15 (type: string), _col17 (type: string)"} + | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"] + | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + |<-Map 2 [BROADCAST_EDGE] + | Reduce Output Operator [RS_58] + | key expressions:_col1 (type: string), _col3 (type: string) + | Map-reduce partition columns:_col1 (type: string), _col3 (type: string) + | sort order:++ + | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col2 (type: string) + | Map Join Operator [MAPJOIN_107] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3"] + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | |<-Map 1 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_53] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_99] + | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_0] + | | alias:cs + | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_4] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_100] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_2] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_51] + outputColumnNames:["_col14","_col15","_col17","_col6","_col7"] + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_112] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 10":"_col2 (type: string), _col4 (type: string)","Map 5":"_col8 (type: string), _col10 (type: string)"} + | outputColumnNames:["_col6","_col7","_col14","_col15","_col17"] + | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + |<-Map 10 [BROADCAST_EDGE] + | Reduce Output Operator [RS_49] + | key expressions:_col2 (type: string), _col4 (type: string) + | Map-reduce partition columns:_col2 (type: string), _col4 (type: string) + | sort order:++ + | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col3 (type: string), _col5 (type: string) + | Map Join Operator [MAPJOIN_111] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 10":"_col0 (type: string)","Map 9":"_col0 (type: string)"} + | | outputColumnNames:["_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | |<-Map 9 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_36] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + | | Select Operator [SEL_31] + | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_105] + | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_29] + | | alias:sr + | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_34] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_106] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_32] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_110] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 3":"_col1 (type: string)","Map 5":"_col5 (type: string)"} + | outputColumnNames:["_col6","_col7","_col8","_col10"] + | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [BROADCAST_EDGE] + | Reduce Output Operator [RS_42] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_7] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_101] + | predicate:((key = 'src1key') and value is not null) (type: boolean) + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_5] + | alias:src1 + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_109] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 5":"_col2 (type: string)","Map 4":"_col0 (type: string)"} + | outputColumnNames:["_col4","_col5","_col6","_col8"] + | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_24] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_102] + | predicate:((value = 'd1value') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_8] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_108] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 5":"_col1 (type: string)","Map 8":"_col3 (type: string)"} + | outputColumnNames:["_col2","_col3","_col4","_col6"] + | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + |<-Map 8 [BROADCAST_EDGE] + | Reduce Output Operator [RS_20] + | key expressions:_col3 (type: string) + | Map-reduce partition columns:_col3 (type: string) + | sort order:+ + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) + | Select Operator [SEL_16] + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_104] + | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_14] + | alias:ss + | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_13] + outputColumnNames:["_col1"] + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_103] + predicate:((key = 'srcpartkey') and value is not null) (type: boolean) + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_11] + alias:srcpart + Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 12 <- Union 10 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Reducer 11 <- Union 10 (SIMPLE_EDGE) +Map 13 <- Map 14 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE), Union 6 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) +Map 5 <- Map 8 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE), Union 6 (CONTAINS) +Map 4 <- Union 2 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE) +Map 9 <- Union 10 (CONTAINS) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 7 + File Output Operator [FS_61] + compressed:false + Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_59] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + |<-Union 6 [SIMPLE_EDGE] + |<-Map 13 [CONTAINS] + | Reduce Output Operator [RS_58] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_57] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_53] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_85] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Reducer 11":"_col0 (type: string)","Map 13":"_col2 (type: string)"} + | | outputColumnNames:["_col0","_col2"] + | |<-Reducer 11 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_49] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_37] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_36] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 10 [SIMPLE_EDGE] + | | |<-Map 12 [CONTAINS] + | | | Reduce Output Operator [RS_35] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_34] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_30] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_79] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_29] + | | | alias:y + | | |<-Map 9 [CONTAINS] + | | Reduce Output Operator [RS_35] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_34] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_28] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_78] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_27] + | | alias:x + | |<-Map Join Operator [MAPJOIN_83] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 14":"_col0 (type: string)","Map 13":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2"] + | |<-Map 14 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_45] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_41] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_81] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_40] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_39] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_80] + | predicate:key is not null (type: boolean) + | TableScan [TS_38] + | alias:y + |<-Map 5 [CONTAINS] + Reduce Output Operator [RS_58] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_57] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_26] + outputColumnNames:["_col0","_col1"] + Map Join Operator [MAPJOIN_84] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Reducer 3":"_col0 (type: string)","Map 5":"_col2 (type: string)"} + | outputColumnNames:["_col0","_col2"] + |<-Reducer 3 [BROADCAST_EDGE] + | Reduce Output Operator [RS_22] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_9] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 2 [SIMPLE_EDGE] + | |<-Map 1 [CONTAINS] + | | Reduce Output Operator [RS_8] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_7] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_74] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_0] + | | alias:x + | |<-Map 4 [CONTAINS] + | Reduce Output Operator [RS_8] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_7] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_75] + | predicate:value is not null (type: boolean) + | TableScan [TS_2] + | alias:y + |<-Map Join Operator [MAPJOIN_82] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 5":"_col0 (type: string)","Map 8":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2"] + |<-Map 8 [BROADCAST_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_14] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_77] + | predicate:(key is not null and value is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_13] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_12] + outputColumnNames:["_col0"] + Filter Operator [FIL_76] + predicate:key is not null (type: boolean) + TableScan [TS_11] + alias:y +PREHOOK: query: explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 22 <- Union 21 (SIMPLE_EDGE), Union 23 (CONTAINS) +Reducer 13 <- Union 12 (SIMPLE_EDGE), Union 14 (CONTAINS) +Map 30 <- Map 31 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Union 8 (CONTAINS) +Map 11 <- Union 12 (CONTAINS) +Reducer 24 <- Union 23 (SIMPLE_EDGE), Union 25 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) +Map 20 <- Union 21 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 9 <- Union 8 (SIMPLE_EDGE) +Reducer 26 <- Union 25 (SIMPLE_EDGE) +Map 16 <- Union 12 (CONTAINS) +Map 29 <- Union 25 (CONTAINS) +Map 28 <- Union 23 (CONTAINS) +Reducer 15 <- Union 14 (SIMPLE_EDGE) +Map 18 <- Map 19 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Union 6 (CONTAINS) +Map 27 <- Union 21 (CONTAINS) +Map 17 <- Union 14 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Map 5 <- Map 10 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE), Union 6 (CONTAINS) +Map 4 <- Union 2 (CONTAINS) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 9 + File Output Operator [FS_122] + compressed:false + Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_120] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + |<-Union 8 [SIMPLE_EDGE] + |<-Map 30 [CONTAINS] + | Reduce Output Operator [RS_119] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_118] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_114] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_164] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 30":"_col3 (type: string)","Reducer 26":"_col0 (type: string)"} + | | outputColumnNames:["_col2","_col3"] + | |<-Reducer 26 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_110] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_98] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_97] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 25 [SIMPLE_EDGE] + | | |<-Reducer 24 [CONTAINS] + | | | Reduce Output Operator [RS_96] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_95] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Group By Operator [GBY_88] + | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | |<-Union 23 [SIMPLE_EDGE] + | | | |<-Reducer 22 [CONTAINS] + | | | | Reduce Output Operator [RS_87] + | | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | | sort order:++ + | | | | Group By Operator [GBY_86] + | | | | keys:_col0 (type: string), _col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | | Group By Operator [GBY_79] + | | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | | | outputColumnNames:["_col0","_col1"] + | | | | |<-Union 21 [SIMPLE_EDGE] + | | | | |<-Map 20 [CONTAINS] + | | | | | Reduce Output Operator [RS_78] + | | | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | | | sort order:++ + | | | | | Group By Operator [GBY_77] + | | | | | keys:_col0 (type: string), _col1 (type: string) + | | | | | outputColumnNames:["_col0","_col1"] + | | | | | Select Operator [SEL_71] + | | | | | outputColumnNames:["_col0","_col1"] + | | | | | Filter Operator [FIL_153] + | | | | | predicate:value is not null (type: boolean) + | | | | | TableScan [TS_70] + | | | | | alias:x + | | | | |<-Map 27 [CONTAINS] + | | | | Reduce Output Operator [RS_78] + | | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | | sort order:++ + | | | | Group By Operator [GBY_77] + | | | | keys:_col0 (type: string), _col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | | Select Operator [SEL_73] + | | | | outputColumnNames:["_col0","_col1"] + | | | | Filter Operator [FIL_154] + | | | | predicate:value is not null (type: boolean) + | | | | TableScan [TS_72] + | | | | alias:y + | | | |<-Map 28 [CONTAINS] + | | | Reduce Output Operator [RS_87] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_86] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_82] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_155] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_81] + | | | alias:y + | | |<-Map 29 [CONTAINS] + | | Reduce Output Operator [RS_96] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_95] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_91] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_156] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_90] + | | alias:y + | |<-Map Join Operator [MAPJOIN_161] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 30":"_col0 (type: string)","Map 31":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3"] + | |<-Map 31 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_106] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_102] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_158] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_101] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_100] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_157] + | predicate:key is not null (type: boolean) + | TableScan [TS_99] + | alias:y + |<-Reducer 7 [CONTAINS] + Reduce Output Operator [RS_119] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_118] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Group By Operator [GBY_68] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + |<-Union 6 [SIMPLE_EDGE] + |<-Map 18 [CONTAINS] + | Reduce Output Operator [RS_67] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_66] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_62] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_163] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Reducer 15":"_col0 (type: string)","Map 18":"_col3 (type: string)"} + | | outputColumnNames:["_col2","_col3"] + | |<-Reducer 15 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_58] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_46] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_45] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 14 [SIMPLE_EDGE] + | | |<-Reducer 13 [CONTAINS] + | | | Reduce Output Operator [RS_44] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_43] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Group By Operator [GBY_36] + | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | |<-Union 12 [SIMPLE_EDGE] + | | | |<-Map 11 [CONTAINS] + | | | | Reduce Output Operator [RS_35] + | | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | | sort order:++ + | | | | Group By Operator [GBY_34] + | | | | keys:_col0 (type: string), _col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | | Select Operator [SEL_28] + | | | | outputColumnNames:["_col0","_col1"] + | | | | Filter Operator [FIL_148] + | | | | predicate:value is not null (type: boolean) + | | | | TableScan [TS_27] + | | | | alias:x + | | | |<-Map 16 [CONTAINS] + | | | Reduce Output Operator [RS_35] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_34] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_30] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_149] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_29] + | | | alias:y + | | |<-Map 17 [CONTAINS] + | | Reduce Output Operator [RS_44] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_43] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_39] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_150] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_38] + | | alias:y + | |<-Map Join Operator [MAPJOIN_160] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 19":"_col0 (type: string)","Map 18":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3"] + | |<-Map 19 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_54] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_50] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_152] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_49] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_48] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_151] + | predicate:key is not null (type: boolean) + | TableScan [TS_47] + | alias:y + |<-Map 5 [CONTAINS] + Reduce Output Operator [RS_67] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_66] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_26] + outputColumnNames:["_col0","_col1"] + Map Join Operator [MAPJOIN_162] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Reducer 3":"_col0 (type: string)","Map 5":"_col3 (type: string)"} + | outputColumnNames:["_col2","_col3"] + |<-Reducer 3 [BROADCAST_EDGE] + | Reduce Output Operator [RS_22] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_9] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 2 [SIMPLE_EDGE] + | |<-Map 1 [CONTAINS] + | | Reduce Output Operator [RS_8] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_7] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_144] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_0] + | | alias:x + | |<-Map 4 [CONTAINS] + | Reduce Output Operator [RS_8] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_7] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_145] + | predicate:value is not null (type: boolean) + | TableScan [TS_2] + | alias:y + |<-Map Join Operator [MAPJOIN_159] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 10":"_col0 (type: string)","Map 5":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3"] + |<-Map 10 [BROADCAST_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_14] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_147] + | predicate:(key is not null and value is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_13] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_12] + outputColumnNames:["_col0","_col1"] + Filter Operator [FIL_146] + predicate:key is not null (type: boolean) + TableScan [TS_11] + alias:y +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_part +POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_part +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab +POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab +PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE tab2(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab2 +POSTHOOK: query: CREATE TABLE tab2(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab2 +PREHOOK: query: insert overwrite table tab2 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab2@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab2 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab2@ds=2008-04-08 +POSTHOOK: Lineage: tab2 PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab2 PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain +select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing statistics. Please check log for more details. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 + File Output Operator [FS_8] + compressed:false + Statistics:Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_13] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"key (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + | + |<-Filter Operator [FIL_12] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_1] + | alias:s3 + | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + |<-Filter Operator [FIL_11] + predicate:key is not null (type: boolean) + Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:s1 + Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain +select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing statistics. Please check log for more details. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + compressed:false + Statistics:Num rows: 146 Data size: 1552 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_25] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"value (type: string)","0":"_col1 (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 146 Data size: 1552 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_9] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: int) + | Merge Join Operator [MERGEJOIN_23] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"key (type: int)","0":"key (type: int)"} + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + | | + | |<-Filter Operator [FIL_21] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_1] + | | alias:s3 + | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + | |<-Filter Operator [FIL_20] + | predicate:(key is not null and value is not null) (type: boolean) + | Statistics:Num rows: 61 Data size: 646 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:s1 + | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_11] + key expressions:value (type: string) + Map-reduce partition columns:value (type: string) + sort order:+ + Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_22] + predicate:value is not null (type: boolean) + Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:s2 + Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain +select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing statistics. Please check log for more details. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 + File Output Operator [FS_8] + compressed:false + Statistics:Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_13] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"key (type: int)","0":"key (type: int)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + | + |<-Filter Operator [FIL_12] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_1] + | alias:s3 + | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + |<-Filter Operator [FIL_11] + predicate:key is not null (type: boolean) + Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:s1 + Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain +select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key join tab2 s2 on s1.value=s2.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key join tab2 s2 on s1.value=s2.value +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing statistics. Please check log for more details. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_14] + compressed:false + Statistics:Num rows: 146 Data size: 1552 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Merge Join Operator [MERGEJOIN_25] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"value (type: string)","0":"_col1 (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 146 Data size: 1552 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_9] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: int) + | Merge Join Operator [MERGEJOIN_23] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"key (type: int)","0":"key (type: int)"} + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + | | + | |<-Filter Operator [FIL_21] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_1] + | | alias:s3 + | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + | |<-Filter Operator [FIL_20] + | predicate:(key is not null and value is not null) (type: boolean) + | Statistics:Num rows: 61 Data size: 646 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:s1 + | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_11] + key expressions:value (type: string) + Map-reduce partition columns:value (type: string) + sort order:+ + Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_22] + predicate:value is not null (type: boolean) + Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:s2 + Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: explain +select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key +UNION ALL +select s2.key as key, s2.value as value from tab s2 +) a join tab_part b on (a.key = b.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key +UNION ALL +select s2.key as key, s2.value as value from tab s2 +) a join tab_part b on (a.key = b.key) +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing statistics. Please check log for more details. + +Vertex dependency in root stage +Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) +Map 1 <- Union 2 (CONTAINS) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Map 6 <- Union 2 (CONTAINS) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 + File Output Operator [FS_22] + compressed:false + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_20] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_19] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_18] + aggregations:["count()"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_35] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"key (type: int)","0":"_col0 (type: int)"} + | Statistics:Num rows: 279 Data size: 2963 Basic stats: COMPLETE Column stats: NONE + |<-Map 7 [SIMPLE_EDGE] + | Reduce Output Operator [RS_15] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_32] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_11] + | alias:b + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Union 2 [SIMPLE_EDGE] + |<-Map 1 [CONTAINS] + | Reduce Output Operator [RS_13] + | key expressions:_col0 (type: int) + | Map-reduce partition columns:_col0 (type: int) + | sort order:+ + | Merge Join Operator [MERGEJOIN_33] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"key (type: int)","0":"key (type: int)"} + | | outputColumnNames:["_col0"] + | | + | |<-Filter Operator [FIL_30] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_1] + | | alias:s3 + | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + | |<-Filter Operator [FIL_29] + | predicate:key is not null (type: boolean) + | TableScan [TS_0] + | alias:s1 + |<-Map 6 [CONTAINS] + Reduce Output Operator [RS_13] + key expressions:_col0 (type: int) + Map-reduce partition columns:_col0 (type: int) + sort order:+ + Select Operator [SEL_9] + outputColumnNames:["_col0"] + Filter Operator [FIL_31] + predicate:key is not null (type: boolean) + TableScan [TS_8] + alias:s2 +PREHOOK: query: explain +select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value +UNION ALL +select s2.key as key, s2.value as value from tab s2 +) a join tab_part b on (a.key = b.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value +UNION ALL +select s2.key as key, s2.value as value from tab s2 +) a join tab_part b on (a.key = b.key) +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing statistics. Please check log for more details. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 9 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) +Map 8 <- Union 3 (CONTAINS) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 5 + File Output Operator [FS_28] + compressed:false + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_26] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_25] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_24] + aggregations:["count()"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_47] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"key (type: int)","0":"_col0 (type: int)"} + | Statistics:Num rows: 293 Data size: 3118 Basic stats: COMPLETE Column stats: NONE + |<-Map 9 [SIMPLE_EDGE] + | Reduce Output Operator [RS_21] + | key expressions:key (type: int) + | Map-reduce partition columns:key (type: int) + | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_43] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_17] + | alias:b + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Union 3 [SIMPLE_EDGE] + |<-Reducer 2 [CONTAINS] + | Reduce Output Operator [RS_19] + | key expressions:_col0 (type: int) + | Map-reduce partition columns:_col0 (type: int) + | sort order:+ + | Merge Join Operator [MERGEJOIN_46] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"value (type: string)","0":"_col1 (type: string)"} + | | outputColumnNames:["_col0"] + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_9] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: int) + | | Merge Join Operator [MERGEJOIN_44] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"1":"key (type: int)","0":"key (type: int)"} + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + | | | + | | |<-Filter Operator [FIL_40] + | | | predicate:key is not null (type: boolean) + | | | Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_1] + | | | alias:s3 + | | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + | | |<-Filter Operator [FIL_39] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 61 Data size: 646 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_0] + | | alias:s1 + | | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + | |<-Map 7 [SIMPLE_EDGE] + | Reduce Output Operator [RS_11] + | key expressions:value (type: string) + | Map-reduce partition columns:value (type: string) + | sort order:+ + | Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_41] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_2] + | alias:s2 + | Statistics:Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + |<-Map 8 [CONTAINS] + Reduce Output Operator [RS_19] + key expressions:_col0 (type: int) + Map-reduce partition columns:_col0 (type: int) + sort order:+ + Select Operator [SEL_15] + outputColumnNames:["_col0"] + Filter Operator [FIL_42] + predicate:key is not null (type: boolean) + TableScan [TS_14] + alias:s2 +PREHOOK: query: explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union all select * from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union all select * from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 12 <- Union 13 (CONTAINS) +Map 14 <- Union 13 (CONTAINS) +Map 21 <- Map 20 (BROADCAST_EDGE) +Map 1 <- Union 2 (CONTAINS) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 13 (SIMPLE_EDGE), Union 4 (CONTAINS) +Map 19 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) +Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Map 16 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 15 <- Union 13 (CONTAINS) +Map 18 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 17 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) +Reducer 3 <- Map 6 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Map 6 <- Map 7 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Union 4 + |<-Reducer 10 [CONTAINS] + | File Output Operator [FS_77] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_45] + | outputColumnNames:["_col0","_col1"] + | Merge Join Operator [MERGEJOIN_118] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} + | | outputColumnNames:["_col0","_col3"] + | |<-Reducer 9 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_41] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col3 (type: string) + | | Merge Join Operator [MERGEJOIN_115] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | | outputColumnNames:["_col0","_col1","_col3"] + | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 11 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_38] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: string) + | | | Select Operator [SEL_25] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_104] + | | | predicate:key is not null (type: boolean) + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_24] + | | | alias:y + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 8 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_36] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_23] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_103] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_22] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Union 13 [SIMPLE_EDGE] + | |<-Map 12 [CONTAINS] + | | Reduce Output Operator [RS_43] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Select Operator [SEL_27] + | | outputColumnNames:["_col0"] + | | Filter Operator [FIL_105] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_26] + | | alias:x + | |<-Map 14 [CONTAINS] + | | Reduce Output Operator [RS_43] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Select Operator [SEL_29] + | | outputColumnNames:["_col0"] + | | Filter Operator [FIL_106] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_28] + | | alias:y + | |<-Map 15 [CONTAINS] + | Reduce Output Operator [RS_43] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Select Operator [SEL_33] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_107] + | predicate:value is not null (type: boolean) + | TableScan [TS_32] + | alias:y + |<-Map 19 [CONTAINS] + | File Output Operator [FS_77] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_75] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_119] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 21":"_col1 (type: string)","Map 19":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col4"] + | |<-Map 21 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_73] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col3 (type: string) + | | Map Join Operator [MAPJOIN_116] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 21":"_col0 (type: string)","Map 20":"_col0 (type: string)"} + | | | outputColumnNames:["_col0","_col1","_col3"] + | | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 20 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_65] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: string) + | | | Select Operator [SEL_61] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_112] + | | | predicate:(key is not null and value is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_60] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_63] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_113] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_62] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | Reduce Output Operator [RS_122] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col3 (type: string) + | | Please refer to the previous Map Join Operator [MAPJOIN_116] + | | Reduce Output Operator [RS_123] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col3 (type: string) + | | Please refer to the previous Map Join Operator [MAPJOIN_116] + | | Reduce Output Operator [RS_124] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col3 (type: string) + | | Please refer to the previous Map Join Operator [MAPJOIN_116] + | |<-Select Operator [SEL_58] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_111] + | predicate:value is not null (type: boolean) + | TableScan [TS_57] + | alias:y + |<-Map 16 [CONTAINS] + | File Output Operator [FS_77] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_75] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_119] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 21":"_col1 (type: string)","Map 16":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col4"] + | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] + | |<-Select Operator [SEL_49] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_108] + | predicate:value is not null (type: boolean) + | TableScan [TS_48] + | alias:x + |<-Map 18 [CONTAINS] + | File Output Operator [FS_77] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_75] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_119] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 21":"_col1 (type: string)","Map 18":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col4"] + | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] + | |<-Select Operator [SEL_55] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_110] + | predicate:value is not null (type: boolean) + | TableScan [TS_54] + | alias:y + |<-Map 17 [CONTAINS] + | File Output Operator [FS_77] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_75] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_119] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 21":"_col1 (type: string)","Map 17":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col4"] + | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] + | |<-Select Operator [SEL_51] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_109] + | predicate:value is not null (type: boolean) + | TableScan [TS_50] + | alias:y + |<-Reducer 3 [CONTAINS] + File Output Operator [FS_77] + compressed:false + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_21] + outputColumnNames:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_117] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col2","_col3"] + |<-Map 6 [SIMPLE_EDGE] + | Reduce Output Operator [RS_19] + | key expressions:_col3 (type: string) + | Map-reduce partition columns:_col3 (type: string) + | sort order:+ + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string), _col2 (type: string) + | Map Join Operator [MAPJOIN_114] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 7":"_col0 (type: string)","Map 6":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3"] + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | |<-Map 7 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_13] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_9] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_102] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_8] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_7] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_101] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_6] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Union 2 [SIMPLE_EDGE] + |<-Map 1 [CONTAINS] + | Reduce Output Operator [RS_17] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Select Operator [SEL_1] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_99] + | predicate:value is not null (type: boolean) + | TableScan [TS_0] + | alias:x + |<-Map 5 [CONTAINS] + Reduce Output Operator [RS_17] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Select Operator [SEL_3] + outputColumnNames:["_col0"] + Filter Operator [FIL_100] + predicate:value is not null (type: boolean) + TableScan [TS_2] + alias:y +PREHOOK: query: explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Map 30 <- Union 24 (CONTAINS) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 23 <- Union 24 (CONTAINS) +Map 32 <- Union 28 (CONTAINS) +Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) +Map 22 <- Union 19 (CONTAINS) +Map 31 <- Union 26 (CONTAINS) +Map 21 <- Union 17 (CONTAINS) +Map 34 <- Map 33 (BROADCAST_EDGE) +Map 1 <- Union 2 (CONTAINS) +Reducer 20 <- Union 19 (SIMPLE_EDGE) +Map 10 <- Map 11 (BROADCAST_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Map 16 <- Union 17 (CONTAINS) +Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) +Reducer 18 <- Union 17 (SIMPLE_EDGE), Union 19 (CONTAINS) +Reducer 29 <- Map 34 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Map 9 <- Union 2 (CONTAINS) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 8 + File Output Operator [FS_121] + compressed:false + Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_119] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE + |<-Union 7 [SIMPLE_EDGE] + |<-Reducer 6 [CONTAINS] + | Reduce Output Operator [RS_118] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_117] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Group By Operator [GBY_67] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | |<-Union 5 [SIMPLE_EDGE] + | |<-Reducer 14 [CONTAINS] + | | Reduce Output Operator [RS_66] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_65] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_61] + | | outputColumnNames:["_col0","_col1"] + | | Merge Join Operator [MERGEJOIN_162] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} + | | | outputColumnNames:["_col0","_col3"] + | | |<-Reducer 13 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_57] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string), _col3 (type: string) + | | | Merge Join Operator [MERGEJOIN_159] + | | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | | | outputColumnNames:["_col0","_col1","_col3"] + | | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | | |<-Map 12 [SIMPLE_EDGE] + | | | | Reduce Output Operator [RS_52] + | | | | key expressions:_col0 (type: string) + | | | | Map-reduce partition columns:_col0 (type: string) + | | | | sort order:+ + | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | | value expressions:_col1 (type: string) + | | | | Select Operator [SEL_28] + | | | | outputColumnNames:["_col0","_col1"] + | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | | Filter Operator [FIL_147] + | | | | predicate:(key is not null and value is not null) (type: boolean) + | | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | | TableScan [TS_27] + | | | | alias:y + | | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | | |<-Map 15 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_54] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: string) + | | | Select Operator [SEL_30] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_148] + | | | predicate:key is not null (type: boolean) + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_29] + | | | alias:y + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 20 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_59] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_50] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_49] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 19 [SIMPLE_EDGE] + | | |<-Map 22 [CONTAINS] + | | | Reduce Output Operator [RS_48] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_47] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_43] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_151] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_42] + | | | alias:y + | | |<-Reducer 18 [CONTAINS] + | | Reduce Output Operator [RS_48] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_47] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Group By Operator [GBY_40] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | |<-Union 17 [SIMPLE_EDGE] + | | |<-Map 21 [CONTAINS] + | | | Reduce Output Operator [RS_39] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_38] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_34] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_150] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_33] + | | | alias:y + | | |<-Map 16 [CONTAINS] + | | Reduce Output Operator [RS_39] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_38] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_32] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_149] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_31] + | | alias:x + | |<-Reducer 4 [CONTAINS] + | Reduce Output Operator [RS_66] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_65] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_26] + | outputColumnNames:["_col0","_col1"] + | Merge Join Operator [MERGEJOIN_161] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col2","_col3"] + | |<-Map 10 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_24] + | | key expressions:_col3 (type: string) + | | Map-reduce partition columns:_col3 (type: string) + | | sort order:+ + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string) + | | Map Join Operator [MAPJOIN_158] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 11":"_col0 (type: string)","Map 10":"_col0 (type: string)"} + | | | outputColumnNames:["_col1","_col2","_col3"] + | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 11 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_18] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: string) + | | | Select Operator [SEL_14] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_146] + | | | predicate:(key is not null and value is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_13] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_12] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_145] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_11] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_22] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_9] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 2 [SIMPLE_EDGE] + | |<-Map 1 [CONTAINS] + | | Reduce Output Operator [RS_8] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_7] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_143] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_0] + | | alias:x + | |<-Map 9 [CONTAINS] + | Reduce Output Operator [RS_8] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_7] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_3] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_144] + | predicate:value is not null (type: boolean) + | TableScan [TS_2] + | alias:y + |<-Reducer 29 [CONTAINS] + Reduce Output Operator [RS_118] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_117] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_113] + outputColumnNames:["_col0","_col1"] + Map Join Operator [MAPJOIN_163] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 34":"_col1 (type: string)","Reducer 29":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col4"] + |<-Map 34 [BROADCAST_EDGE] + | Reduce Output Operator [RS_111] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col3 (type: string) + | Map Join Operator [MAPJOIN_160] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 34":"_col0 (type: string)","Map 33":"_col0 (type: string)"} + | | outputColumnNames:["_col0","_col1","_col3"] + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | |<-Map 33 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_103] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_99] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_156] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_98] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_101] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_157] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_100] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_97] + outputColumnNames:["_col0"] + Group By Operator [GBY_96] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + |<-Union 28 [SIMPLE_EDGE] + |<-Map 32 [CONTAINS] + | Reduce Output Operator [RS_95] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_94] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_90] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_155] + | predicate:value is not null (type: boolean) + | TableScan [TS_89] + | alias:y + |<-Reducer 27 [CONTAINS] + Reduce Output Operator [RS_95] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_94] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Group By Operator [GBY_87] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + |<-Union 26 [SIMPLE_EDGE] + |<-Reducer 25 [CONTAINS] + | Reduce Output Operator [RS_86] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_85] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Group By Operator [GBY_78] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | |<-Union 24 [SIMPLE_EDGE] + | |<-Map 30 [CONTAINS] + | | Reduce Output Operator [RS_77] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_76] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_72] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_153] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_71] + | | alias:y + | |<-Map 23 [CONTAINS] + | Reduce Output Operator [RS_77] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_76] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_70] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_152] + | predicate:value is not null (type: boolean) + | TableScan [TS_69] + | alias:x + |<-Map 31 [CONTAINS] + Reduce Output Operator [RS_86] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_85] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_81] + outputColumnNames:["_col0","_col1"] + Filter Operator [FIL_154] + predicate:value is not null (type: boolean) + TableScan [TS_80] + alias:y +PREHOOK: query: CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: CREATE TABLE b(key STRING, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: CREATE TABLE b(key STRING, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: CREATE TABLE c(key STRING, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@c +POSTHOOK: query: CREATE TABLE c(key STRING, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@c +PREHOOK: query: explain +from +( +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union all select * from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +) tmp +INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +from +( +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union all select * from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +) tmp +INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Map 12 <- Union 9 (CONTAINS) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Map 11 <- Union 9 (CONTAINS) +Map 21 <- Map 20 (BROADCAST_EDGE) +Map 1 <- Union 2 (CONTAINS) +Reducer 10 <- Reducer 14 (SIMPLE_EDGE), Union 4 (CONTAINS), Union 9 (SIMPLE_EDGE) +Map 19 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 16 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 18 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 17 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) +Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Map 7 <- Map 6 (BROADCAST_EDGE) +Map 8 <- Union 9 (CONTAINS) + +Stage-7 + Stats-Aggr Operator + Stage-2 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Stage-4 + Dependency Collection{} + Stage-3 + Union 4 + |<-Reducer 10 [CONTAINS] + | File Output Operator [FS_62] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_37] + | outputColumnNames:["_col0","_col1"] + | Merge Join Operator [MERGEJOIN_107] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col1 (type: string)","0":"_col1 (type: string)"} + | | outputColumnNames:["_col0","_col6"] + | |<-Reducer 14 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_33] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col6 (type: string) + | | Merge Join Operator [MERGEJOIN_104] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"1":"key (type: string)","0":"key (type: string)"} + | | | outputColumnNames:["_col0","_col1","_col6"] + | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 13 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_28] + | | | key expressions:key (type: string) + | | | Map-reduce partition columns:key (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:value (type: string) + | | | Filter Operator [FIL_95] + | | | predicate:(key is not null and value is not null) (type: boolean) + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_25] + | | | alias:x + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 15 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_30] + | | key expressions:key (type: string) + | | Map-reduce partition columns:key (type: string) + | | sort order:+ + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:value (type: string) + | | Filter Operator [FIL_96] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_26] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Union 9 [SIMPLE_EDGE] + | |<-Map 12 [CONTAINS] + | | Reduce Output Operator [RS_35] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Select Operator [SEL_24] + | | outputColumnNames:["_col1"] + | | Filter Operator [FIL_94] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_23] + | | alias:src + | |<-Map 11 [CONTAINS] + | | Reduce Output Operator [RS_35] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Select Operator [SEL_21] + | | outputColumnNames:["_col1"] + | | Filter Operator [FIL_93] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_20] + | | alias:src + | |<-Map 8 [CONTAINS] + | Reduce Output Operator [RS_35] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Select Operator [SEL_19] + | outputColumnNames:["_col1"] + | Filter Operator [FIL_92] + | predicate:value is not null (type: boolean) + | TableScan [TS_18] + | alias:src1 + | File Output Operator [FS_64] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Please refer to the previous Select Operator [SEL_37] + | File Output Operator [FS_66] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Please refer to the previous Select Operator [SEL_37] + |<-Map 19 [CONTAINS] + | File Output Operator [FS_62] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_60] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_108] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 21":"_col1 (type: string)","Map 19":"_col1 (type: string)"} + | | outputColumnNames:["_col0","_col6"] + | |<-Map 21 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_56] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col6 (type: string) + | | Map Join Operator [MAPJOIN_105] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 21":"key (type: string)","Map 20":"key (type: string)"} + | | | outputColumnNames:["_col0","_col1","_col6"] + | | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 20 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_51] + | | | key expressions:key (type: string) + | | | Map-reduce partition columns:key (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:value (type: string) + | | | Filter Operator [FIL_101] + | | | predicate:(key is not null and value is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_48] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Filter Operator [FIL_102] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_49] + | | alias:y + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | Reduce Output Operator [RS_111] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col6 (type: string) + | | Please refer to the previous Map Join Operator [MAPJOIN_105] + | | Reduce Output Operator [RS_112] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col6 (type: string) + | | Please refer to the previous Map Join Operator [MAPJOIN_105] + | | Reduce Output Operator [RS_113] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col6 (type: string) + | | Please refer to the previous Map Join Operator [MAPJOIN_105] + | |<-Select Operator [SEL_47] + | outputColumnNames:["_col1"] + | Filter Operator [FIL_100] + | predicate:value is not null (type: boolean) + | TableScan [TS_46] + | alias:src + | File Output Operator [FS_64] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Please refer to the previous Select Operator [SEL_60] + | File Output Operator [FS_66] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Please refer to the previous Select Operator [SEL_60] + |<-Map 16 [CONTAINS] + | File Output Operator [FS_62] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_60] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_108] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 21":"_col1 (type: string)","Map 16":"_col1 (type: string)"} + | | outputColumnNames:["_col0","_col6"] + | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] + | |<-Select Operator [SEL_40] + | outputColumnNames:["_col1"] + | Filter Operator [FIL_97] + | predicate:value is not null (type: boolean) + | TableScan [TS_39] + | alias:src1 + | File Output Operator [FS_64] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Please refer to the previous Select Operator [SEL_60] + | File Output Operator [FS_66] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Please refer to the previous Select Operator [SEL_60] + |<-Map 18 [CONTAINS] + | File Output Operator [FS_62] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_60] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_108] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 21":"_col1 (type: string)","Map 18":"_col1 (type: string)"} + | | outputColumnNames:["_col0","_col6"] + | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] + | |<-Select Operator [SEL_45] + | outputColumnNames:["_col1"] + | Filter Operator [FIL_99] + | predicate:value is not null (type: boolean) + | TableScan [TS_44] + | alias:src + | File Output Operator [FS_64] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Please refer to the previous Select Operator [SEL_60] + | File Output Operator [FS_66] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Please refer to the previous Select Operator [SEL_60] + |<-Map 17 [CONTAINS] + | File Output Operator [FS_62] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_60] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_108] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 21":"_col1 (type: string)","Map 17":"_col1 (type: string)"} + | | outputColumnNames:["_col0","_col6"] + | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] + | |<-Select Operator [SEL_42] + | outputColumnNames:["_col1"] + | Filter Operator [FIL_98] + | predicate:value is not null (type: boolean) + | TableScan [TS_41] + | alias:src + | File Output Operator [FS_64] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Please refer to the previous Select Operator [SEL_60] + | File Output Operator [FS_66] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Please refer to the previous Select Operator [SEL_60] + |<-Reducer 3 [CONTAINS] + File Output Operator [FS_62] + compressed:false + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_17] + outputColumnNames:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_106] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col1 (type: string)","0":"_col1 (type: string)"} + | outputColumnNames:["_col0","_col6"] + |<-Map 7 [SIMPLE_EDGE] + | Reduce Output Operator [RS_13] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col6 (type: string) + | Map Join Operator [MAPJOIN_103] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 7":"key (type: string)","Map 6":"key (type: string)"} + | | outputColumnNames:["_col0","_col1","_col6"] + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | |<-Map 6 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_8] + | | key expressions:key (type: string) + | | Map-reduce partition columns:key (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:value (type: string) + | | Filter Operator [FIL_90] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_5] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Filter Operator [FIL_91] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_6] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Union 2 [SIMPLE_EDGE] + |<-Map 1 [CONTAINS] + | Reduce Output Operator [RS_15] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Select Operator [SEL_1] + | outputColumnNames:["_col1"] + | Filter Operator [FIL_88] + | predicate:value is not null (type: boolean) + | TableScan [TS_0] + | alias:src1 + |<-Map 5 [CONTAINS] + Reduce Output Operator [RS_15] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) + sort order:+ + Select Operator [SEL_3] + outputColumnNames:["_col1"] + Filter Operator [FIL_89] + predicate:value is not null (type: boolean) + TableScan [TS_2] + alias:src + File Output Operator [FS_64] + compressed:false + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Please refer to the previous Select Operator [SEL_17] + File Output Operator [FS_66] + compressed:false + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Please refer to the previous Select Operator [SEL_17] +Stage-6 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Please refer to the previous Stage-4 +Stage-5 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Please refer to the previous Stage-4 +PREHOOK: query: explain +FROM +( +SELECT x.key as key, y.value as value from src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key as key, y.value as value from src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key as key, y.value as value from src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value) +) tmp +INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +( +SELECT x.key as key, y.value as value from src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key as key, y.value as value from src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key as key, y.value as value from src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value) +) tmp +INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Map 12 <- Union 13 (CONTAINS) +Map 30 <- Union 24 (CONTAINS) +Map 11 <- Map 10 (BROADCAST_EDGE) +Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS) +Map 23 <- Union 24 (CONTAINS) +Map 32 <- Union 28 (CONTAINS) +Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) +Map 31 <- Union 26 (CONTAINS) +Map 34 <- Map 33 (BROADCAST_EDGE) +Map 1 <- Union 2 (CONTAINS) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 19 <- Union 15 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) +Map 18 <- Union 13 (CONTAINS) +Reducer 29 <- Map 34 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 16 <- Union 15 (SIMPLE_EDGE) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Map 9 <- Union 2 (CONTAINS) + +Stage-7 + Stats-Aggr Operator + Stage-2 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Stage-4 + Dependency Collection{} + Stage-3 + Reducer 8 + File Output Operator [FS_114] + compressed:false + Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_111] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE + |<-Union 7 [SIMPLE_EDGE] + |<-Reducer 6 [CONTAINS] + | Reduce Output Operator [RS_110] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_109] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Group By Operator [GBY_62] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | |<-Union 5 [SIMPLE_EDGE] + | |<-Reducer 4 [CONTAINS] + | | Reduce Output Operator [RS_61] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_60] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_23] + | | outputColumnNames:["_col0","_col1"] + | | Merge Join Operator [MERGEJOIN_158] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"1":"_col1 (type: string)","0":"_col1 (type: string)"} + | | | outputColumnNames:["_col0","_col6"] + | | |<-Map 11 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_19] + | | | key expressions:_col1 (type: string) + | | | Map-reduce partition columns:_col1 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col0 (type: string), _col6 (type: string) + | | | Map Join Operator [MAPJOIN_155] + | | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | | keys:{"Map 11":"key (type: string)","Map 10":"key (type: string)"} + | | | | outputColumnNames:["_col0","_col1","_col6"] + | | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | | |<-Map 10 [BROADCAST_EDGE] + | | | | Reduce Output Operator [RS_14] + | | | | key expressions:key (type: string) + | | | | Map-reduce partition columns:key (type: string) + | | | | sort order:+ + | | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | | value expressions:value (type: string) + | | | | Filter Operator [FIL_142] + | | | | predicate:(key is not null and value is not null) (type: boolean) + | | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | | TableScan [TS_11] + | | | | alias:x + | | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | | |<-Filter Operator [FIL_143] + | | | predicate:key is not null (type: boolean) + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_12] + | | | alias:y + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 3 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_21] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_10] + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_9] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 2 [SIMPLE_EDGE] + | | |<-Map 1 [CONTAINS] + | | | Reduce Output Operator [RS_8] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_7] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_1] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_140] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_0] + | | | alias:src1 + | | |<-Map 9 [CONTAINS] + | | Reduce Output Operator [RS_8] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_7] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_3] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_141] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_2] + | | alias:src + | |<-Reducer 17 [CONTAINS] + | Reduce Output Operator [RS_61] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_60] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_56] + | outputColumnNames:["_col0","_col1"] + | Merge Join Operator [MERGEJOIN_159] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col1 (type: string)","0":"_col1 (type: string)"} + | | outputColumnNames:["_col0","_col6"] + | |<-Reducer 16 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_54] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_43] + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_42] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | | |<-Union 15 [SIMPLE_EDGE] + | | |<-Reducer 14 [CONTAINS] + | | | Reduce Output Operator [RS_41] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_40] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Group By Operator [GBY_33] + | | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | |<-Union 13 [SIMPLE_EDGE] + | | | |<-Map 12 [CONTAINS] + | | | | Reduce Output Operator [RS_32] + | | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | | sort order:++ + | | | | Group By Operator [GBY_31] + | | | | keys:_col0 (type: string), _col1 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | | Select Operator [SEL_25] + | | | | outputColumnNames:["_col0","_col1"] + | | | | Filter Operator [FIL_144] + | | | | predicate:value is not null (type: boolean) + | | | | TableScan [TS_24] + | | | | alias:src1 + | | | |<-Map 18 [CONTAINS] + | | | Reduce Output Operator [RS_32] + | | | key expressions:_col0 (type: string), _col1 (type: string) + | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | | sort order:++ + | | | Group By Operator [GBY_31] + | | | keys:_col0 (type: string), _col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Select Operator [SEL_27] + | | | outputColumnNames:["_col0","_col1"] + | | | Filter Operator [FIL_145] + | | | predicate:value is not null (type: boolean) + | | | TableScan [TS_26] + | | | alias:src + | | |<-Map 19 [CONTAINS] + | | Reduce Output Operator [RS_41] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_40] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_36] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_146] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_35] + | | alias:src + | |<-Reducer 21 [SIMPLE_EDGE] + | Reduce Output Operator [RS_52] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col6 (type: string) + | Merge Join Operator [MERGEJOIN_156] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"key (type: string)","0":"key (type: string)"} + | | outputColumnNames:["_col0","_col1","_col6"] + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | |<-Map 20 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_47] + | | key expressions:key (type: string) + | | Map-reduce partition columns:key (type: string) + | | sort order:+ + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | value expressions:value (type: string) + | | Filter Operator [FIL_147] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_44] + | | alias:x + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map 22 [SIMPLE_EDGE] + | Reduce Output Operator [RS_49] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: string) + | Filter Operator [FIL_148] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_45] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 29 [CONTAINS] + Reduce Output Operator [RS_110] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_109] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_105] + outputColumnNames:["_col0","_col1"] + Map Join Operator [MAPJOIN_160] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 34":"_col1 (type: string)","Reducer 29":"_col1 (type: string)"} + | outputColumnNames:["_col0","_col6"] + |<-Map 34 [BROADCAST_EDGE] + | Reduce Output Operator [RS_101] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col6 (type: string) + | Map Join Operator [MAPJOIN_157] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 34":"key (type: string)","Map 33":"key (type: string)"} + | | outputColumnNames:["_col0","_col1","_col6"] + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | |<-Map 33 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_96] + | | key expressions:key (type: string) + | | Map-reduce partition columns:key (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:value (type: string) + | | Filter Operator [FIL_153] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_93] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Filter Operator [FIL_154] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_94] + | alias:y + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_92] + outputColumnNames:["_col1"] + Group By Operator [GBY_91] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + |<-Union 28 [SIMPLE_EDGE] + |<-Map 32 [CONTAINS] + | Reduce Output Operator [RS_90] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_89] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_85] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_152] + | predicate:value is not null (type: boolean) + | TableScan [TS_84] + | alias:src + |<-Reducer 27 [CONTAINS] + Reduce Output Operator [RS_90] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_89] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Group By Operator [GBY_82] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + |<-Union 26 [SIMPLE_EDGE] + |<-Reducer 25 [CONTAINS] + | Reduce Output Operator [RS_81] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_80] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Group By Operator [GBY_73] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | |<-Union 24 [SIMPLE_EDGE] + | |<-Map 30 [CONTAINS] + | | Reduce Output Operator [RS_72] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_71] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_67] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_150] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_66] + | | alias:src + | |<-Map 23 [CONTAINS] + | Reduce Output Operator [RS_72] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_71] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_65] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_149] + | predicate:value is not null (type: boolean) + | TableScan [TS_64] + | alias:src1 + |<-Map 31 [CONTAINS] + Reduce Output Operator [RS_81] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_80] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_76] + outputColumnNames:["_col0","_col1"] + Filter Operator [FIL_151] + predicate:value is not null (type: boolean) + TableScan [TS_75] + alias:src + File Output Operator [FS_116] + compressed:false + Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Please refer to the previous Group By Operator [GBY_111] + File Output Operator [FS_118] + compressed:false + Statistics:Num rows: 272 Data size: 2889 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Please refer to the previous Group By Operator [GBY_111] +Stage-6 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Please refer to the previous Stage-4 +Stage-5 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Please refer to the previous Stage-4 +PREHOOK: query: CREATE TABLE DEST1(key STRING, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1 +POSTHOOK: query: CREATE TABLE DEST1(key STRING, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2 +POSTHOOK: query: CREATE TABLE DEST2(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2 +PREHOOK: query: explain +FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION DISTINCT + select s2.key as key, s2.value as value from src s2) unionsrc +INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key +INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION DISTINCT + select s2.key as key, s2.value as value from src s2) unionsrc +INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key +INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Union 3 (SIMPLE_EDGE) +Map 6 <- Union 3 (CONTAINS) + +Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest2","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 5 + File Output Operator [FS_20] + compressed:false + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest1","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Group By Operator [GBY_18] + | aggregations:["count(DISTINCT KEY._col1:0._col0)"] + | keys:KEY._col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_17] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:++ + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Group By Operator [GBY_16] + aggregations:["count(DISTINCT substr(_col1, 5))"] + keys:_col0 (type: string), substr(_col1, 5) (type: string) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Group By Operator [GBY_13] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + |<-Union 3 [SIMPLE_EDGE] + |<-Reducer 2 [CONTAINS] + | Reduce Output Operator [RS_12] + | key expressions:_col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:+++ + | Group By Operator [GBY_11] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_5] + | outputColumnNames:["_col0","_col1"] + | Group By Operator [GBY_4] + | | aggregations:["count(VALUE._col0)"] + | | outputColumnNames:["_col0"] + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_3] + | sort order: + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: bigint) + | Group By Operator [GBY_2] + | aggregations:["count(1)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_1] + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:s1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 6 [CONTAINS] + Reduce Output Operator [RS_12] + key expressions:_col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:+++ + Group By Operator [GBY_11] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1"] + TableScan [TS_6] + alias:s2 + File Output Operator [FS_26] + compressed:false + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest2","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_25] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Group By Operator [GBY_24] + aggregations:["count(DISTINCT substr(_col1, 5))"] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Please refer to the previous Group By Operator [GBY_13] +Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.dest1","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Please refer to the previous Stage-3 +PREHOOK: query: EXPLAIN FROM UNIQUEJOIN PRESERVE src a (a.key), PRESERVE src1 b (b.key), PRESERVE srcpart c (c.key) SELECT a.key, b.key, c.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM UNIQUEJOIN PRESERVE src a (a.key), PRESERVE src1 b (b.key), PRESERVE srcpart c (c.key) SELECT a.key, b.key, c.key +POSTHOOK: type: QUERY +Plan not optimized by CBO due to missing feature [Unique_join]. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_8] + compressed:false + Statistics:Num rows: 4400 Data size: 46745 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 4400 Data size: 46745 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_9] + | condition map:[{"":"Unique Join0 to 0"},{"":"Unique Join0 to 0"},{"":"Unique Join0 to 0"}] + | keys:{"2":"key (type: string)","1":"key (type: string)","0":"key (type: string)"} + | outputColumnNames:["_col0","_col5","_col10"] + | Statistics:Num rows: 4400 Data size: 46745 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_3] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_4] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_1] + | alias:b + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:key (type: string) + Map-reduce partition columns:key (type: string) + sort order:+ + Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:c + Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE +PREHOOK: query: EXPLAIN +SELECT +TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) +FROM src a join src b +on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT +TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) +FROM src a join src b +on a.key = b.key +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_9] + compressed:false + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Transform Operator [SCR_8] + command:cat + output info:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_14] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"key (type: string)","0":"key (type: string)"} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_3] + | key expressions:key (type: string) + | Map-reduce partition columns:key (type: string) + | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:value (type: string) + | Filter Operator [FIL_12] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:a + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_5] + key expressions:key (type: string) + Map-reduce partition columns:key (type: string) + sort order:+ + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_13] + predicate:key is not null (type: boolean) + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_1] + alias:b + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/tez/subquery_in.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/subquery_in.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/subquery_in.q.out (working copy) @@ -293,15 +293,11 @@ TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -323,24 +319,24 @@ Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: int + output shape: _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 - partition by: _col0 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col1 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) @@ -350,7 +346,7 @@ predicate: (_wcol0 <= 2) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int) + expressions: _col5 (type: int) outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -465,15 +461,11 @@ TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -499,34 +491,34 @@ Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2, _col5 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: int + output shape: _col2: string, _col5: int type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 - partition by: _col0 + order by: _col5 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col1 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_wcol0 <= 2) and _col0 is not null) (type: boolean) + predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: int) + expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Group By Operator Index: ql/src/test/results/clientpositive/tez/tez_join.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/tez_join.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/tez_join.q.out (working copy) @@ -48,9 +48,7 @@ Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -69,7 +67,7 @@ key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 4 + Map 3 Map Operator Tree: TableScan alias: t2 @@ -91,46 +89,33 @@ expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 3 Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (_col0 = _col1) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (_col0 = _col1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -160,161 +145,3 @@ POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain -select vt1.id from -(select rt1.id from -(select t1.id, t1.od, count(*) from t1 group by t1.id, t1.od) rt1) vt1 -join -(select rt2.id from -(select t2.id, t2.od, count(*) from t2 group by t2.id, t2.od) rt2) vt2 -where vt1.id=vt2.id -PREHOOK: type: QUERY -POSTHOOK: query: explain -select vt1.id from -(select rt1.id from -(select t1.id, t1.od, count(*) from t1 group by t1.id, t1.od) rt1) vt1 -join -(select rt2.id from -(select t2.id, t2.od, count(*) from t2 group by t2.id, t2.od) rt2) vt2 -where vt1.id=vt2.id -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: id is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: id (type: string), od (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col2 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: id is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: id (type: string), od (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col2 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (_col0 = _col1) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select vt1.id from -(select rt1.id from -(select t1.id, t1.od, count(*) from t1 group by t1.id, t1.od) rt1) vt1 -join -(select rt2.id from -(select t2.id, t2.od, count(*) from t2 group by t2.id, t2.od) rt2) vt2 -where vt1.id=vt2.id -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select vt1.id from -(select rt1.id from -(select t1.id, t1.od, count(*) from t1 group by t1.id, t1.od) rt1) vt1 -join -(select rt2.id from -(select t2.id, t2.od, count(*) from t2 group by t2.id, t2.od) rt2) vt2 -where vt1.id=vt2.id -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### Index: ql/src/test/results/clientpositive/tez/tez_smb_1.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/tez_smb_1.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/tez_smb_1.q.out (working copy) @@ -177,3 +177,146 @@ Processor Tree: ListSink +PREHOOK: query: explain +select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col1) (type: boolean) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +480 Index: ql/src/test/results/clientpositive/tez/tez_smb_main.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/tez_smb_main.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/tez_smb_main.q.out (working copy) @@ -1194,3 +1194,155 @@ Processor Tree: ListSink +PREHOOK: query: explain +select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col1) (type: boolean) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +(select rt1.id from +(select t1.key as id, t1.value as od from tab t1 order by id, od) rt1) vt1 +join +(select rt2.id from +(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 +where vt1.id=vt2.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +480 Index: ql/src/test/results/clientpositive/tez/update_all_types.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/update_all_types.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/update_all_types.q.out (working copy) @@ -1,6 +1,7 @@ PREHOOK: query: create table acid_uat(ti tinyint, si smallint, i int, + j int, bi bigint, f float, d double, @@ -17,6 +18,7 @@ POSTHOOK: query: create table acid_uat(ti tinyint, si smallint, i int, + j int, bi bigint, f float, d double, @@ -34,6 +36,7 @@ select ctinyint, csmallint, cint, + cint j, cbigint, cfloat, cdouble, @@ -52,6 +55,7 @@ select ctinyint, csmallint, cint, + cint j, cbigint, cfloat, cdouble, @@ -74,6 +78,7 @@ POSTHOOK: Lineage: acid_uat.dt EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: acid_uat.f SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] POSTHOOK: Lineage: acid_uat.i SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_uat.j SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: acid_uat.s SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] POSTHOOK: Lineage: acid_uat.si SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] POSTHOOK: Lineage: acid_uat.t SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] @@ -87,20 +92,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_uat #### A masked pattern was here #### -11 NULL -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true -NULL -7382 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1970-01-01 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false -11 NULL -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false -NULL 8373 -1072081801 1864027286 NULL 8373.0 NULL NULL 1970-01-01 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true -NULL -5470 -1072076362 1864027286 NULL -5470.0 NULL NULL 1970-01-01 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true --51 NULL -1071480828 -1401575336 -51.0 NULL -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true -8 NULL -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true -NULL -741 -1070883071 -1645852809 NULL -741.0 NULL NULL 1970-01-01 0ruyd6Y50JpdGRf6HqD 0ruyd6Y50JpdGRf6HqD xH7445Rals48VOulSyR5F false -NULL -947 -1070551679 1864027286 NULL -947.0 NULL NULL 1970-01-01 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false -11 NULL -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true +11 NULL -1073279343 -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true +NULL -7382 -1073051226 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1969-12-31 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false +11 NULL -1072910839 -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false +NULL 8373 -1072081801 -1072081801 1864027286 NULL 8373.0 NULL NULL 1969-12-31 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true +NULL -5470 -1072076362 -1072076362 1864027286 NULL -5470.0 NULL NULL 1969-12-31 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true +-51 NULL -1071480828 -1071480828 -1401575336 -51.0 NULL -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true +8 NULL -1071363017 -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true +NULL -741 -1070883071 -1070883071 -1645852809 NULL -741.0 NULL NULL 1969-12-31 0ruyd6Y50JpdGRf6HqD 0ruyd6Y50JpdGRf6HqD xH7445Rals48VOulSyR5F false +NULL -947 -1070551679 -1070551679 1864027286 NULL -947.0 NULL NULL 1969-12-31 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false +11 NULL -1069736047 -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true PREHOOK: query: update acid_uat set ti = 1, si = 2, - i = 3, + j = 3, bi = 4, f = 3.14, d = 6.28, @@ -118,7 +123,7 @@ POSTHOOK: query: update acid_uat set ti = 1, si = 2, - i = 3, + j = 3, bi = 4, f = 3.14, d = 6.28, @@ -141,16 +146,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_uat #### A masked pattern was here #### -11 NULL -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true -NULL -7382 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1970-01-01 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false -11 NULL -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false -NULL 8373 -1072081801 1864027286 NULL 8373.0 NULL NULL 1970-01-01 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true -NULL -5470 -1072076362 1864027286 NULL -5470.0 NULL NULL 1970-01-01 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true --51 NULL -1071480828 -1401575336 -51.0 NULL -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true -8 NULL -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true -NULL -947 -1070551679 1864027286 NULL -947.0 NULL NULL 1970-01-01 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false -11 NULL -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true -1 2 3 4 3.14 6.28 5.99 NULL 2014-09-01 its a beautiful day in the neighbhorhood a beautiful day for a neighbor wont you be mine true +11 NULL -1073279343 -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true +NULL -7382 -1073051226 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1969-12-31 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false +11 NULL -1072910839 -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false +NULL 8373 -1072081801 -1072081801 1864027286 NULL 8373.0 NULL NULL 1969-12-31 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true +NULL -5470 -1072076362 -1072076362 1864027286 NULL -5470.0 NULL NULL 1969-12-31 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true +-51 NULL -1071480828 -1071480828 -1401575336 -51.0 NULL -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true +8 NULL -1071363017 -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true +1 2 -1070883071 3 4 3.14 6.28 5.99 NULL 2014-09-01 its a beautiful day in the neighbhorhood a beautiful day for a neighbor wont you be mine true +NULL -947 -1070551679 -1070551679 1864027286 NULL -947.0 NULL NULL 1969-12-31 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false +11 NULL -1069736047 -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true PREHOOK: query: update acid_uat set ti = ti * 2, si = cast(f as int), @@ -175,13 +180,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_uat #### A masked pattern was here #### -11 NULL -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true -NULL -7382 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1970-01-01 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false -11 NULL -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false -NULL 8373 -1072081801 1864027286 NULL 8373.0 NULL NULL 1970-01-01 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true -NULL -5470 -1072076362 1864027286 NULL -5470.0 NULL NULL 1970-01-01 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true --102 -51 -1071480828 -1401575336 -51.0 -51.0 -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true -8 NULL -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true -NULL -947 -1070551679 1864027286 NULL -947.0 NULL NULL 1970-01-01 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false -11 NULL -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true -1 2 3 4 3.14 6.28 5.99 NULL 2014-09-01 its a beautiful day in the neighbhorhood a beautiful day for a neighbor wont you be mine true +11 NULL -1073279343 -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true +NULL -7382 -1073051226 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1969-12-31 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false +11 NULL -1072910839 -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false +NULL 8373 -1072081801 -1072081801 1864027286 NULL 8373.0 NULL NULL 1969-12-31 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true +NULL -5470 -1072076362 -1072076362 1864027286 NULL -5470.0 NULL NULL 1969-12-31 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true +-102 -51 -1071480828 -1071480828 -1401575336 -51.0 -51.0 -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true +8 NULL -1071363017 -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true +1 2 -1070883071 3 4 3.14 6.28 5.99 NULL 2014-09-01 its a beautiful day in the neighbhorhood a beautiful day for a neighbor wont you be mine true +NULL -947 -1070551679 -1070551679 1864027286 NULL -947.0 NULL NULL 1969-12-31 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false +11 NULL -1069736047 -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true Index: ql/src/test/results/clientpositive/tez/update_tmp_table.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/update_tmp_table.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/update_tmp_table.q.out (working copy) @@ -34,11 +34,11 @@ -1070883071 0ruyd6Y50JpdGRf6HqD -1070551679 iUR3Q -1069736047 k17Am8uPHWk02cEf1jet -PREHOOK: query: update acid_utt set b = 'fred' where b = '0ruyd6Y50JpdGRf6HqD' +PREHOOK: query: update acid_utt set a = 'fred' where b = '0ruyd6Y50JpdGRf6HqD' PREHOOK: type: QUERY PREHOOK: Input: default@acid_utt PREHOOK: Output: default@acid_utt -POSTHOOK: query: update acid_utt set b = 'fred' where b = '0ruyd6Y50JpdGRf6HqD' +POSTHOOK: query: update acid_utt set a = 'fred' where b = '0ruyd6Y50JpdGRf6HqD' POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_utt POSTHOOK: Output: default@acid_utt @@ -50,6 +50,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_utt #### A masked pattern was here #### +NULL 0ruyd6Y50JpdGRf6HqD -1073279343 oj1YrV5Wa -1073051226 A34p7oRr2WvUJNf -1072910839 0iqrc5 @@ -57,6 +58,5 @@ -1072076362 2uLyD28144vklju213J1mr -1071480828 aw724t8c5558x2xneC624 -1071363017 Anj0oF --1070883071 fred -1070551679 iUR3Q -1069736047 k17Am8uPHWk02cEf1jet Index: ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/vector_aggregate_9.q.out (working copy) @@ -144,17 +144,13 @@ mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: decimal(38,18)) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -170,4 +166,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vectortab2korc #### A masked pattern was here #### --4997414117561.546875 4994550248722.298828 -10252745435816.024410 -5399023399.587163986308583465 +-4997414117561.546875 4994550248722.298828 -10252745435816.02441 -5399023399.587163986308583465 Index: ql/src/test/results/clientpositive/tez/vector_between_in.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vector_between_in.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/vector_between_in.q.out (working copy) @@ -495,40 +495,40 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 1970-01-01 1970-01-01 1970-01-01 1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 PREHOOK: query: SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY PREHOOK: Input: default@decimal_date_test @@ -537,2949 +537,2949 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -1678-01-21 -1678-01-24 -1678-02-04 -1678-03-20 -1678-04-12 -1678-08-17 -1678-11-01 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-10-13 -1680-12-14 -1681-02-27 -1681-06-10 -1681-10-21 -1682-03-29 +1678-01-20 +1678-01-23 +1678-02-03 +1678-03-19 +1678-04-11 +1678-08-16 +1678-10-31 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-10-12 +1680-12-13 +1681-02-26 +1681-06-09 +1681-10-20 +1682-03-28 +1682-05-05 1682-05-06 -1682-05-07 -1682-11-12 -1682-11-12 -1682-11-12 -1682-11-12 -1682-11-12 -1682-11-12 -1683-03-27 -1683-06-08 -1683-06-08 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-07-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1684-06-28 -1684-07-23 -1684-09-10 -1684-09-12 -1684-11-13 -1684-11-22 -1685-01-28 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-05-26 -1685-06-21 -1685-08-02 -1685-12-09 -1686-02-01 +1682-11-11 +1682-11-11 +1682-11-11 +1682-11-11 +1682-11-11 +1682-11-11 +1683-03-26 +1683-06-07 +1683-06-07 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-07-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1684-06-27 +1684-07-22 +1684-09-09 +1684-09-11 +1684-11-12 +1684-11-21 +1685-01-27 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-05-25 +1685-06-20 +1685-08-01 +1685-12-08 +1686-01-31 +1686-03-20 1686-03-21 -1686-03-22 -1686-04-02 -1686-11-30 -1686-12-03 -1686-12-03 -1686-12-03 -1686-12-03 -1686-12-03 -1686-12-03 -1687-02-02 -1687-03-05 -1687-03-24 -1687-05-24 -1687-07-16 -1687-09-29 -1687-10-25 -1687-11-07 -1687-11-19 -1687-12-17 -1688-07-24 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-08-03 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-12-30 -1689-01-31 -1689-09-24 -1689-10-30 -1690-01-28 -1690-03-13 -1690-03-20 -1690-05-28 -1690-09-21 -1690-10-01 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1691-02-10 -1691-07-18 -1691-08-09 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1692-05-13 -1692-11-14 -1693-03-18 -1693-07-16 -1693-12-17 -1693-12-27 -1694-06-30 -1694-08-05 -1694-11-18 -1694-11-18 -1694-11-18 -1695-05-20 -1695-10-19 -1695-11-04 -1695-11-11 -1696-02-17 -1696-02-27 -1696-03-24 -1696-04-09 -1696-05-03 -1696-05-18 -1696-05-28 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-26 -1696-11-04 -1696-12-17 -1697-01-25 -1697-05-02 -1697-05-28 -1697-08-30 -1697-10-13 -1697-12-10 -1698-03-02 -1698-04-29 -1698-05-06 -1698-05-26 -1698-05-26 -1698-06-12 -1698-08-09 -1698-11-25 -1698-12-10 -1698-12-10 -1698-12-10 -1699-03-10 -1699-03-14 -1699-04-18 -1699-09-28 -1700-02-27 -1700-04-11 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-05-01 -1700-05-18 -1700-05-18 -1700-05-18 -1700-05-18 -1700-05-18 -1700-06-24 -1700-08-09 -1700-08-14 -1700-09-23 -1700-11-20 -1700-11-24 -1701-02-18 -1701-03-21 -1701-05-30 -1701-07-24 +1686-04-01 +1686-11-29 +1686-12-02 +1686-12-02 +1686-12-02 +1686-12-02 +1686-12-02 +1686-12-02 +1687-02-01 +1687-03-04 +1687-03-23 +1687-05-23 +1687-07-15 +1687-09-28 +1687-10-24 +1687-11-06 +1687-11-18 +1687-12-16 +1688-07-23 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-08-02 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-12-29 +1689-01-30 +1689-09-23 +1689-10-29 +1690-01-27 +1690-03-12 +1690-03-19 +1690-05-27 +1690-09-20 +1690-09-30 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1691-02-09 +1691-07-17 +1691-08-08 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1692-05-12 +1692-11-13 +1693-03-17 +1693-07-15 +1693-12-16 +1693-12-26 +1694-06-29 +1694-08-04 +1694-11-17 +1694-11-17 +1694-11-17 +1695-05-19 +1695-10-18 +1695-11-03 +1695-11-10 +1696-02-16 +1696-02-26 +1696-03-23 +1696-04-08 +1696-05-02 +1696-05-17 +1696-05-27 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-25 +1696-11-03 +1696-12-16 +1697-01-24 +1697-05-01 +1697-05-27 +1697-08-29 +1697-10-12 +1697-12-09 +1698-03-01 +1698-04-28 +1698-05-05 +1698-05-25 +1698-05-25 +1698-06-11 +1698-08-08 +1698-11-24 +1698-12-09 +1698-12-09 +1698-12-09 +1699-03-09 +1699-03-13 +1699-04-17 +1699-09-27 +1700-02-26 +1700-04-10 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-30 +1700-05-17 +1700-05-17 +1700-05-17 +1700-05-17 +1700-05-17 +1700-06-23 +1700-08-08 +1700-08-13 +1700-09-22 +1700-11-19 +1700-11-23 +1701-02-17 +1701-03-20 +1701-05-29 +1701-07-23 +1701-07-29 1701-07-30 -1701-07-31 -1701-09-19 -1701-10-25 -1701-11-03 -1702-01-09 -1702-01-09 -1702-01-09 -1702-01-09 -1702-05-16 -1702-06-04 -1702-07-24 -1702-10-04 -1703-01-30 -1703-02-13 -1703-02-21 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-06-16 -1703-08-18 +1701-09-18 +1701-10-24 +1701-11-02 +1702-01-08 +1702-01-08 +1702-01-08 +1702-01-08 +1702-05-15 +1702-06-03 +1702-07-23 +1702-10-03 +1703-01-29 +1703-02-12 +1703-02-20 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-06-15 +1703-08-17 +1703-09-02 +1703-09-02 +1703-09-02 +1703-09-02 +1703-09-02 1703-09-03 -1703-09-03 -1703-09-03 -1703-09-03 -1703-09-03 -1703-09-04 -1703-09-20 -1703-10-27 -1704-01-23 -1704-08-07 -1704-08-15 -1704-08-20 -1704-09-26 -1704-09-26 -1704-09-26 -1704-09-26 -1704-09-26 -1704-11-24 -1704-12-22 -1705-02-23 -1705-04-18 -1705-04-26 -1705-04-26 -1705-04-26 -1705-04-26 -1705-06-09 -1705-08-06 -1705-12-04 -1706-01-11 -1706-02-13 -1706-06-11 -1706-06-11 -1706-06-21 -1706-06-23 -1706-06-25 -1706-07-13 -1706-07-24 -1706-08-08 -1706-08-11 -1706-09-01 -1706-09-25 -1706-11-15 -1706-12-01 +1703-09-19 +1703-10-26 +1704-01-22 +1704-08-06 +1704-08-14 +1704-08-19 +1704-09-25 +1704-09-25 +1704-09-25 +1704-09-25 +1704-09-25 +1704-11-23 +1704-12-21 +1705-02-22 +1705-04-17 +1705-04-25 +1705-04-25 +1705-04-25 +1705-04-25 +1705-06-08 +1705-08-05 +1705-12-03 +1706-01-10 +1706-02-12 +1706-06-10 +1706-06-10 +1706-06-20 +1706-06-22 +1706-06-24 +1706-07-12 +1706-07-23 +1706-08-07 +1706-08-10 +1706-08-31 +1706-09-24 +1706-11-14 +1706-11-30 +1706-12-23 1706-12-24 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1707-03-05 -1707-04-17 -1707-05-06 -1707-11-23 -1708-02-13 -1708-04-14 -1708-06-25 -1708-07-22 -1708-08-06 -1708-09-22 -1708-10-16 -1708-11-05 -1708-12-31 -1709-03-02 -1709-04-22 -1709-05-15 -1709-09-16 -1710-01-08 -1710-04-30 -1710-05-29 -1710-06-22 -1710-08-02 -1710-09-09 -1710-09-30 -1710-11-25 -1711-01-13 -1711-05-11 -1711-05-24 -1711-08-05 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-10-20 -1711-12-05 -1712-02-02 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1707-03-04 +1707-04-16 +1707-05-05 +1707-11-22 +1708-02-12 +1708-04-13 +1708-06-24 +1708-07-21 +1708-08-05 +1708-09-21 +1708-10-15 +1708-11-04 +1708-12-30 +1709-03-01 +1709-04-21 +1709-05-14 +1709-09-15 +1710-01-07 +1710-04-29 +1710-05-28 +1710-06-21 +1710-08-01 +1710-09-08 +1710-09-29 +1710-11-24 +1711-01-12 +1711-05-10 +1711-05-23 +1711-08-04 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-10-19 +1711-12-04 +1712-02-01 +1712-03-22 1712-03-23 -1712-03-24 -1712-03-26 -1712-05-14 -1712-10-10 -1712-10-10 -1712-10-10 -1712-10-10 -1712-10-10 -1712-12-19 -1713-02-24 -1713-06-02 -1713-06-22 -1713-07-06 -1713-08-19 -1713-08-24 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-27 -1714-12-17 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-03-05 -1715-03-09 -1715-06-22 -1715-07-25 -1715-09-20 -1715-11-10 -1716-05-30 -1716-06-03 -1716-06-07 -1716-07-19 +1712-03-25 +1712-05-13 +1712-10-09 +1712-10-09 +1712-10-09 +1712-10-09 +1712-10-09 +1712-12-18 +1713-02-23 +1713-06-01 +1713-06-21 +1713-07-05 +1713-08-18 +1713-08-23 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-10-26 +1714-12-16 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-03-04 +1715-03-08 +1715-06-21 +1715-07-24 +1715-09-19 +1715-11-09 +1716-05-29 +1716-06-02 +1716-06-06 +1716-07-18 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-16 -1717-02-18 -1717-07-30 -1717-10-18 -1717-11-06 -1717-12-01 -1717-12-15 -1717-12-26 -1717-12-31 -1718-02-08 -1718-02-25 -1718-03-12 -1718-03-28 -1718-09-09 -1718-10-10 -1718-10-10 -1718-10-10 -1718-10-10 -1718-10-10 -1718-10-10 -1718-11-12 -1718-11-15 -1718-12-29 -1719-04-17 -1719-04-22 -1719-05-16 -1719-08-24 -1719-09-22 -1719-12-09 -1719-12-26 -1720-03-02 -1720-03-02 -1720-03-02 -1720-03-02 -1720-03-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-09-05 -1720-10-24 -1720-10-24 -1720-10-24 -1720-10-24 -1720-10-24 -1721-01-23 -1721-03-13 -1721-04-30 -1721-05-14 -1721-06-07 -1721-06-19 -1721-07-12 -1721-08-18 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-02-10 +1717-02-17 +1717-07-29 +1717-10-17 +1717-11-05 +1717-11-30 +1717-12-14 +1717-12-25 +1717-12-30 +1718-02-07 +1718-02-24 +1718-03-11 +1718-03-27 +1718-09-08 +1718-10-09 +1718-10-09 +1718-10-09 +1718-10-09 +1718-10-09 +1718-10-09 +1718-11-11 +1718-11-14 +1718-12-28 +1719-04-16 +1719-04-21 +1719-05-15 +1719-08-23 +1719-09-21 +1719-12-08 +1719-12-25 +1720-03-01 +1720-03-01 +1720-03-01 +1720-03-01 +1720-03-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-09-04 +1720-10-23 +1720-10-23 +1720-10-23 +1720-10-23 +1720-10-23 +1721-01-22 +1721-03-12 +1721-04-29 +1721-05-13 +1721-06-06 +1721-06-18 +1721-07-11 +1721-08-17 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-02-09 +1722-02-24 1722-02-25 -1722-02-26 -1722-06-14 -1722-07-15 -1722-10-01 -1722-12-01 -1722-12-14 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-03-04 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-05-19 -1723-06-21 -1723-07-24 -1723-08-02 -1723-10-30 -1723-12-01 -1724-01-26 +1722-06-13 +1722-07-14 +1722-09-30 +1722-11-30 +1722-12-13 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-03-03 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-05-18 +1723-06-20 +1723-07-23 +1723-08-01 +1723-10-29 +1723-11-30 +1724-01-25 +1724-03-26 1724-03-27 -1724-03-28 -1724-04-15 -1724-05-20 -1724-07-11 -1724-08-23 -1724-10-09 -1724-12-07 -1725-03-19 -1725-06-01 -1725-08-04 -1726-04-02 -1726-07-06 -1726-07-06 -1726-07-21 -1726-10-31 -1727-06-18 -1727-07-14 -1727-07-23 -1727-07-26 -1727-08-22 -1728-02-15 -1728-03-07 -1728-09-19 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-10 -1728-12-17 -1729-04-09 -1729-04-30 -1729-05-09 -1729-06-23 -1729-08-14 -1729-08-29 -1729-11-07 -1729-12-13 -1730-02-03 -1730-02-12 -1730-04-16 -1730-05-14 -1730-08-27 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-04-14 -1731-06-28 -1731-08-09 -1731-08-09 -1731-08-09 -1731-08-09 -1731-10-07 -1731-10-31 -1732-01-21 -1732-01-27 -1732-02-08 -1732-02-19 -1732-02-23 -1732-03-05 -1732-04-27 -1732-06-26 -1732-07-11 -1732-07-22 -1732-08-31 -1732-12-01 -1733-06-23 -1733-09-04 -1733-09-08 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1734-02-22 -1734-03-02 -1734-03-20 -1734-06-16 -1734-07-03 +1724-04-14 +1724-05-19 +1724-07-10 +1724-08-22 +1724-10-08 +1724-12-06 +1725-03-18 +1725-05-31 +1725-08-03 +1726-04-01 +1726-07-05 +1726-07-05 +1726-07-20 +1726-10-30 +1727-06-17 +1727-07-13 +1727-07-22 +1727-07-25 +1727-08-21 +1728-02-14 +1728-03-06 +1728-09-18 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-09 +1728-12-16 +1729-04-08 +1729-04-29 +1729-05-08 +1729-06-22 +1729-08-13 +1729-08-28 +1729-11-06 +1729-12-12 +1730-02-02 +1730-02-11 +1730-04-15 +1730-05-13 +1730-08-26 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-04-13 +1731-06-27 +1731-08-08 +1731-08-08 +1731-08-08 +1731-08-08 +1731-10-06 +1731-10-30 +1732-01-20 +1732-01-26 +1732-02-07 +1732-02-18 +1732-02-22 +1732-03-04 +1732-04-26 +1732-06-25 +1732-07-10 +1732-07-21 +1732-08-30 +1732-11-30 +1733-06-22 +1733-09-03 +1733-09-07 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1734-02-21 +1734-03-01 +1734-03-19 +1734-06-15 +1734-07-02 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-14 -1734-10-24 -1734-12-10 -1735-01-31 -1735-02-11 -1735-02-15 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-09-04 -1735-09-16 -1735-09-28 -1735-11-29 -1735-12-04 -1735-12-12 -1736-04-13 -1736-04-28 -1736-06-24 -1736-09-28 -1736-11-14 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1737-02-23 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-05-15 -1737-06-28 -1737-06-30 -1737-07-05 -1737-07-17 +1734-10-23 +1734-12-09 +1735-01-30 +1735-02-10 +1735-02-14 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-09-03 +1735-09-15 +1735-09-27 +1735-11-28 +1735-12-03 +1735-12-11 +1736-04-12 +1736-04-27 +1736-06-23 +1736-09-27 +1736-11-13 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1737-02-22 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-05-14 +1737-06-27 +1737-06-29 +1737-07-04 +1737-07-16 +1737-08-01 1737-08-02 -1737-08-03 -1737-11-06 -1737-12-09 -1738-01-25 -1738-04-05 -1738-06-01 -1738-06-05 -1738-10-25 -1738-10-25 -1738-10-25 -1738-10-25 -1738-10-25 -1738-10-25 -1739-02-11 -1739-02-19 -1739-02-19 -1739-02-19 -1739-02-19 -1739-02-28 -1739-07-05 -1739-09-04 -1740-01-10 -1740-01-13 -1740-01-13 -1740-01-13 -1740-01-13 -1740-02-07 -1740-03-23 +1737-11-05 +1737-12-08 +1738-01-24 +1738-04-04 +1738-05-31 +1738-06-04 +1738-10-24 +1738-10-24 +1738-10-24 +1738-10-24 +1738-10-24 +1738-10-24 +1739-02-10 +1739-02-18 +1739-02-18 +1739-02-18 +1739-02-18 +1739-02-27 +1739-07-04 +1739-09-03 +1740-01-09 +1740-01-12 +1740-01-12 +1740-01-12 +1740-01-12 +1740-02-06 +1740-03-22 +1740-04-18 1740-04-19 -1740-04-20 -1740-07-13 -1740-11-24 -1740-11-28 -1741-04-14 -1741-06-02 -1741-08-16 -1741-08-27 -1741-09-11 -1741-11-26 -1741-11-26 -1741-12-31 -1742-06-07 -1742-12-09 -1742-12-18 -1742-12-26 -1743-01-11 -1743-01-16 -1743-01-16 -1743-01-16 -1743-01-20 -1743-02-03 -1743-02-10 -1743-12-14 -1744-01-03 -1744-04-14 -1744-09-14 -1744-09-19 -1744-09-24 -1744-12-05 -1744-12-05 -1744-12-05 -1744-12-05 -1745-02-09 -1745-03-15 -1745-05-13 -1745-06-13 -1745-08-21 -1745-10-28 -1745-10-30 -1746-01-20 -1746-01-26 -1746-02-16 -1746-03-18 -1746-07-28 -1746-09-18 -1746-10-02 -1746-12-21 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-04-16 -1747-06-09 -1747-07-29 -1747-09-24 -1747-11-08 -1747-11-27 -1748-03-21 -1748-04-05 -1748-04-14 -1748-04-25 -1748-08-09 -1749-06-27 -1749-10-24 -1750-04-20 -1750-04-27 -1750-05-29 -1750-07-04 -1750-08-01 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-19 -1750-11-22 -1750-12-23 -1750-12-26 -1751-03-01 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-08-22 -1751-12-04 -1751-12-07 -1751-12-25 -1752-03-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-24 -1752-06-06 -1752-08-14 -1752-12-19 -1752-12-19 -1752-12-19 -1752-12-19 -1752-12-19 -1753-03-01 -1753-03-17 -1753-04-12 -1753-07-10 -1753-07-31 -1753-08-26 -1753-09-09 -1753-10-16 -1753-11-23 -1753-11-26 -1753-12-01 -1753-12-01 -1753-12-01 -1753-12-01 -1753-12-01 -1753-12-01 -1754-04-01 -1754-04-21 -1754-05-29 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-07-20 -1754-08-20 -1754-09-04 -1754-12-03 -1755-01-08 -1755-01-11 -1755-02-22 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-05-13 -1755-07-25 -1755-07-25 -1755-07-25 -1755-08-30 -1755-09-03 -1755-09-21 -1755-11-19 -1755-12-17 -1756-08-20 -1756-10-24 -1756-11-03 -1757-02-22 -1757-08-07 -1757-09-17 -1757-10-20 -1757-11-10 -1758-05-14 -1758-05-17 -1758-08-11 -1759-01-22 -1759-02-19 -1759-03-05 -1759-03-05 -1759-03-12 -1759-03-20 -1759-04-27 -1759-05-08 -1759-08-02 -1759-08-10 -1759-09-25 -1759-11-10 -1759-11-25 -1759-11-25 -1759-11-25 -1759-11-25 -1759-11-25 -1760-01-11 -1760-03-03 -1760-03-28 -1760-04-17 -1760-09-11 -1761-01-02 -1761-01-02 -1761-01-02 -1761-01-02 -1761-01-10 -1761-06-23 -1761-08-17 -1761-09-30 -1761-11-14 -1761-11-16 -1761-12-02 -1762-05-04 -1762-05-19 -1762-08-28 -1762-11-28 -1762-11-28 -1762-11-28 -1762-11-28 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-05-19 -1763-07-17 -1763-07-25 -1763-07-29 -1763-08-05 -1763-12-30 -1764-02-05 -1764-04-29 -1764-07-22 -1764-09-30 -1764-12-01 -1765-01-28 -1765-06-30 -1765-08-22 -1765-11-12 -1766-03-15 -1766-07-23 -1766-09-24 -1766-12-14 -1767-03-25 -1767-04-22 -1767-05-09 -1767-05-16 -1767-05-16 -1767-05-16 -1767-08-10 -1767-11-01 -1767-11-15 -1768-02-02 -1768-06-19 -1768-10-31 -1768-12-22 -1768-12-22 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-02-02 -1769-07-28 -1769-08-20 -1770-03-04 -1770-03-08 -1770-03-21 -1770-08-02 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-10-02 -1770-10-25 -1771-04-05 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-07-01 -1772-03-12 -1772-04-24 -1772-04-24 -1772-04-24 -1772-04-24 -1772-04-24 -1772-04-24 -1772-05-27 -1772-09-13 -1772-09-15 -1772-12-24 -1772-12-30 -1773-04-08 -1773-06-07 -1773-11-16 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-19 -1774-06-15 -1774-07-06 -1774-08-01 -1774-11-02 -1775-04-15 -1775-06-04 -1775-06-16 -1775-07-16 +1740-07-12 +1740-11-23 +1740-11-27 +1741-04-13 +1741-06-01 +1741-08-15 +1741-08-26 +1741-09-10 +1741-11-25 +1741-11-25 +1741-12-30 +1742-06-06 +1742-12-08 +1742-12-17 +1742-12-25 +1743-01-10 +1743-01-15 +1743-01-15 +1743-01-15 +1743-01-19 +1743-02-02 +1743-02-09 +1743-12-13 +1744-01-02 +1744-04-13 +1744-09-13 +1744-09-18 +1744-09-23 +1744-12-04 +1744-12-04 +1744-12-04 +1744-12-04 +1745-02-08 +1745-03-14 +1745-05-12 +1745-06-12 +1745-08-20 +1745-10-27 +1745-10-29 +1746-01-19 +1746-01-25 +1746-02-15 +1746-03-17 +1746-07-27 +1746-09-17 +1746-10-01 +1746-12-20 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-04-15 +1747-06-08 +1747-07-28 +1747-09-23 +1747-11-07 +1747-11-26 +1748-03-20 +1748-04-04 +1748-04-13 +1748-04-24 +1748-08-08 +1749-06-26 +1749-10-23 +1750-04-19 +1750-04-26 +1750-05-28 +1750-07-03 +1750-07-31 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-18 +1750-11-21 +1750-12-22 +1750-12-25 +1751-02-28 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-08-21 +1751-12-03 +1751-12-06 +1751-12-24 +1752-03-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-23 +1752-06-05 +1752-08-13 +1752-12-18 +1752-12-18 +1752-12-18 +1752-12-18 +1752-12-18 +1753-02-28 +1753-03-16 +1753-04-11 +1753-07-09 +1753-07-30 +1753-08-25 +1753-09-08 +1753-10-15 +1753-11-22 +1753-11-25 +1753-11-30 +1753-11-30 +1753-11-30 +1753-11-30 +1753-11-30 +1753-11-30 +1754-03-31 +1754-04-20 +1754-05-28 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-07-19 +1754-08-19 +1754-09-03 +1754-12-02 +1755-01-07 +1755-01-10 +1755-02-21 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-05-12 +1755-07-24 +1755-07-24 +1755-07-24 +1755-08-29 +1755-09-02 +1755-09-20 +1755-11-18 +1755-12-16 +1756-08-19 +1756-10-23 +1756-11-02 +1757-02-21 +1757-08-06 +1757-09-16 +1757-10-19 +1757-11-09 +1758-05-13 +1758-05-16 +1758-08-10 +1759-01-21 +1759-02-18 +1759-03-04 +1759-03-04 +1759-03-11 +1759-03-19 +1759-04-26 +1759-05-07 +1759-08-01 +1759-08-09 +1759-09-24 +1759-11-09 +1759-11-24 +1759-11-24 +1759-11-24 +1759-11-24 +1759-11-24 +1760-01-10 +1760-03-02 +1760-03-27 +1760-04-16 +1760-09-10 +1761-01-01 +1761-01-01 +1761-01-01 +1761-01-01 +1761-01-09 +1761-06-22 +1761-08-16 +1761-09-29 +1761-11-13 +1761-11-15 +1761-12-01 +1762-05-03 +1762-05-18 +1762-08-27 +1762-11-27 +1762-11-27 +1762-11-27 +1762-11-27 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-05-18 +1763-07-16 +1763-07-24 +1763-07-28 +1763-08-04 +1763-12-29 +1764-02-04 +1764-04-28 +1764-07-21 +1764-09-29 +1764-11-30 +1765-01-27 +1765-06-29 +1765-08-21 +1765-11-11 +1766-03-14 +1766-07-22 +1766-09-23 +1766-12-13 +1767-03-24 +1767-04-21 +1767-05-08 +1767-05-15 +1767-05-15 +1767-05-15 +1767-08-09 +1767-10-31 +1767-11-14 +1768-02-01 +1768-06-18 +1768-10-30 +1768-12-21 +1768-12-21 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-02-01 +1769-07-27 +1769-08-19 +1770-03-03 +1770-03-07 +1770-03-20 +1770-08-01 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-10-01 +1770-10-24 +1771-04-04 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-06-30 +1772-03-11 +1772-04-23 +1772-04-23 +1772-04-23 +1772-04-23 +1772-04-23 +1772-04-23 +1772-05-26 +1772-09-12 +1772-09-14 +1772-12-23 +1772-12-29 +1773-04-07 +1773-06-06 +1773-11-15 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-18 +1774-06-14 +1774-07-05 +1774-07-31 +1774-11-01 +1775-04-14 +1775-06-03 +1775-06-15 +1775-07-15 +1776-01-28 1776-01-29 -1776-01-30 -1776-09-18 -1776-09-26 -1776-10-09 -1776-10-15 -1776-12-06 -1777-01-28 -1777-01-30 -1777-01-30 -1777-01-30 -1777-01-30 -1777-01-30 -1777-01-30 -1777-04-04 -1777-04-16 -1777-05-26 -1777-06-05 -1778-01-09 -1778-04-25 -1779-01-11 -1779-04-02 -1779-04-10 -1779-04-29 -1779-04-29 -1779-04-29 -1779-04-29 -1779-04-29 -1779-08-02 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1780-02-01 -1780-12-11 -1781-02-13 -1781-08-19 -1781-10-10 -1781-11-20 -1782-02-08 -1782-05-17 -1782-06-06 -1782-06-09 -1782-06-20 -1782-07-04 -1782-10-04 -1782-10-10 -1782-10-10 -1782-10-10 -1782-10-10 -1782-10-10 -1782-11-04 -1783-01-15 -1783-05-14 -1783-07-16 -1783-07-16 -1783-07-24 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-29 -1784-04-12 -1784-05-09 -1785-01-06 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-05-22 -1785-06-09 -1785-06-19 -1785-08-29 -1785-09-30 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1786-03-19 -1786-06-08 -1786-08-06 -1786-08-29 -1786-09-23 -1786-09-29 -1786-10-06 -1787-01-28 -1787-02-23 -1787-04-24 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-31 -1787-05-31 -1787-05-31 -1787-05-31 -1787-05-31 -1787-05-31 -1787-06-28 -1787-07-07 -1787-08-02 -1787-10-06 -1787-10-19 -1787-10-24 -1787-11-11 -1787-12-19 +1776-09-17 +1776-09-25 +1776-10-08 +1776-10-14 +1776-12-05 +1777-01-27 +1777-01-29 +1777-01-29 +1777-01-29 +1777-01-29 +1777-01-29 +1777-01-29 +1777-04-03 +1777-04-15 +1777-05-25 +1777-06-04 +1778-01-08 +1778-04-24 +1779-01-10 +1779-04-01 +1779-04-09 +1779-04-28 +1779-04-28 +1779-04-28 +1779-04-28 +1779-04-28 +1779-08-01 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1780-01-31 +1780-12-10 +1781-02-12 +1781-08-18 +1781-10-09 +1781-11-19 +1782-02-07 +1782-05-16 +1782-06-05 +1782-06-08 +1782-06-19 +1782-07-03 +1782-10-03 +1782-10-09 +1782-10-09 +1782-10-09 +1782-10-09 +1782-10-09 +1782-11-03 +1783-01-14 +1783-05-13 +1783-07-15 +1783-07-15 +1783-07-23 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-28 +1784-04-11 +1784-05-08 +1785-01-05 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-05-21 +1785-06-08 +1785-06-18 +1785-08-28 +1785-09-29 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1786-03-18 +1786-06-07 +1786-08-05 +1786-08-28 +1786-09-22 +1786-09-28 +1786-10-05 +1787-01-27 +1787-02-22 +1787-04-23 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-30 +1787-05-30 +1787-05-30 +1787-05-30 +1787-05-30 +1787-05-30 +1787-06-27 +1787-07-06 +1787-08-01 +1787-10-05 +1787-10-18 +1787-10-23 +1787-11-10 +1787-12-18 +1788-04-05 1788-04-06 -1788-04-07 -1788-07-04 -1788-08-06 -1789-01-22 -1789-01-22 -1789-01-22 -1789-02-08 -1789-04-18 -1789-05-10 -1789-08-10 -1790-01-30 -1790-04-19 -1790-10-10 -1791-01-24 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-07-20 -1791-10-04 -1792-06-16 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-08-20 -1793-08-22 -1793-11-19 -1794-03-19 -1794-03-19 -1794-03-19 -1794-03-19 -1794-03-19 -1794-04-16 -1794-04-20 -1794-05-15 -1794-07-03 -1794-08-13 -1794-09-02 -1794-09-24 -1794-10-16 -1794-11-02 -1794-11-14 -1795-01-17 -1795-03-09 -1795-05-27 -1795-05-27 -1795-05-27 -1795-05-27 -1795-05-27 +1788-07-03 +1788-08-05 +1789-01-21 +1789-01-21 +1789-01-21 +1789-02-07 +1789-04-17 +1789-05-09 +1789-08-09 +1790-01-29 +1790-04-18 +1790-10-09 +1791-01-23 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-07-19 +1791-10-03 +1792-06-15 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-08-19 +1793-08-21 +1793-11-18 +1794-03-18 +1794-03-18 +1794-03-18 +1794-03-18 +1794-03-18 +1794-04-15 +1794-04-19 +1794-05-14 +1794-07-02 +1794-08-12 +1794-09-01 +1794-09-23 +1794-10-15 +1794-11-01 +1794-11-13 +1795-01-16 +1795-03-08 +1795-05-26 +1795-05-26 +1795-05-26 +1795-05-26 +1795-05-26 +1797-01-03 1797-01-04 -1797-01-05 -1797-09-29 -1797-10-01 -1798-02-14 -1798-09-28 -1799-04-29 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-12-02 -1800-03-30 -1800-04-05 -1800-04-23 -1800-09-04 -1800-10-01 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-11-25 -1801-12-15 -1802-03-30 -1802-04-11 -1802-06-01 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-08-13 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-30 -1802-11-26 -1802-12-13 -1803-02-04 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-14 -1803-07-11 -1803-12-02 -1803-12-08 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-04 -1804-03-08 -1804-07-16 -1804-10-23 -1804-12-28 -1805-01-27 -1805-03-19 -1805-07-15 -1805-07-20 -1805-10-23 -1805-10-23 -1805-10-23 -1805-10-23 -1805-10-23 -1806-01-02 -1806-02-10 -1806-10-12 -1807-02-18 -1807-02-23 -1807-03-09 -1807-06-15 -1807-07-09 -1807-09-02 -1807-10-25 -1807-10-29 -1807-12-29 -1808-03-03 -1808-03-13 -1808-05-10 -1808-07-02 -1808-09-10 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-04-23 -1809-06-06 -1809-09-15 -1809-09-18 -1809-12-24 -1810-08-17 -1810-08-17 -1811-01-27 -1811-01-27 -1811-01-27 -1811-01-27 -1811-04-10 -1811-04-27 -1811-05-31 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-08-14 -1812-01-17 -1812-05-25 -1812-06-11 -1812-10-16 -1812-12-24 -1813-02-04 -1813-03-18 -1813-04-11 -1813-07-09 -1813-08-20 -1813-10-20 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-04-26 -1814-05-28 -1814-11-09 -1814-11-20 -1814-12-21 -1815-01-16 -1815-02-23 -1815-03-10 -1815-04-30 -1815-07-30 -1816-01-13 -1816-02-13 -1816-03-13 -1816-08-03 -1816-08-12 -1816-12-25 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-17 -1817-05-15 -1817-05-20 -1817-06-02 -1817-07-02 -1817-07-12 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 +1797-09-28 +1797-09-30 +1798-02-13 +1798-09-27 +1799-04-28 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-12-01 +1800-03-29 +1800-04-04 +1800-04-22 +1800-09-03 +1800-09-30 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-11-24 +1801-12-14 +1802-03-29 +1802-04-10 +1802-05-31 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-08-12 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-29 +1802-11-25 +1802-12-12 +1803-02-03 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-13 +1803-07-10 +1803-12-01 +1803-12-07 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-03-03 +1804-03-07 +1804-07-15 +1804-10-22 +1804-12-27 +1805-01-26 +1805-03-18 +1805-07-14 +1805-07-19 +1805-10-22 +1805-10-22 +1805-10-22 +1805-10-22 +1805-10-22 +1806-01-01 +1806-02-09 +1806-10-11 +1807-02-17 +1807-02-22 +1807-03-08 +1807-06-14 +1807-07-08 +1807-09-01 +1807-10-24 +1807-10-28 +1807-12-28 +1808-03-02 +1808-03-12 +1808-05-09 +1808-07-01 +1808-09-09 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-04-22 +1809-06-05 +1809-09-14 +1809-09-17 +1809-12-23 +1810-08-16 +1810-08-16 +1811-01-26 +1811-01-26 +1811-01-26 +1811-01-26 +1811-04-09 +1811-04-26 +1811-05-30 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-08-13 +1812-01-16 +1812-05-24 +1812-06-10 +1812-10-15 +1812-12-23 +1813-02-03 +1813-03-17 +1813-04-10 +1813-07-08 +1813-08-19 +1813-10-19 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-04-25 +1814-05-27 +1814-11-08 +1814-11-19 +1814-12-20 +1815-01-15 +1815-02-22 +1815-03-09 +1815-04-29 +1815-07-29 +1816-01-12 +1816-02-12 +1816-03-12 +1816-08-02 +1816-08-11 +1816-12-24 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-16 +1817-05-14 +1817-05-19 +1817-06-01 +1817-07-01 +1817-07-11 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-10-14 1817-10-15 -1817-10-16 -1817-12-18 -1818-01-01 -1818-02-25 -1818-05-26 -1818-06-02 -1818-07-17 -1818-08-13 -1818-09-21 -1818-09-23 -1818-09-25 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-22 -1819-11-19 -1820-04-09 -1820-04-11 -1820-06-04 -1820-07-14 -1820-08-05 -1820-10-01 -1820-10-03 -1820-11-01 -1820-11-29 -1820-11-29 -1820-11-29 -1820-11-29 -1820-11-29 -1820-11-29 -1821-01-07 -1821-01-12 -1821-05-16 -1821-07-26 -1821-08-14 -1821-10-11 -1821-11-10 -1821-12-08 -1822-01-22 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-16 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-08-19 -1822-11-23 -1822-12-03 -1823-01-31 -1823-03-15 -1823-03-18 +1817-12-17 +1817-12-31 +1818-02-24 +1818-05-25 +1818-06-01 +1818-07-16 +1818-08-12 +1818-09-20 +1818-09-22 +1818-09-24 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-21 +1819-11-18 +1820-04-08 +1820-04-10 +1820-06-03 +1820-07-13 +1820-08-04 +1820-09-30 +1820-10-02 +1820-10-31 +1820-11-28 +1820-11-28 +1820-11-28 +1820-11-28 +1820-11-28 +1820-11-28 +1821-01-06 +1821-01-11 +1821-05-15 +1821-07-25 +1821-08-13 +1821-10-10 +1821-11-09 +1821-12-07 +1822-01-21 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-15 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-08-18 +1822-11-22 +1822-12-02 +1823-01-30 +1823-03-14 +1823-03-17 +1823-05-12 1823-05-13 -1823-05-14 -1823-06-03 -1823-08-06 -1823-10-08 -1824-01-08 -1824-01-21 -1824-02-08 -1824-06-29 -1824-07-10 -1824-08-13 -1824-08-25 -1824-09-16 -1825-02-06 -1825-02-19 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-06-18 -1825-08-24 -1825-08-28 -1825-11-06 -1825-12-27 -1826-01-16 -1826-07-25 -1826-11-10 -1826-11-29 -1827-05-13 -1827-06-12 -1827-06-22 -1827-07-05 -1827-07-23 -1827-07-23 -1827-07-23 -1827-08-07 -1827-12-13 -1828-02-14 -1828-06-20 -1828-10-14 -1829-02-17 -1829-07-24 -1829-11-09 -1830-05-27 -1830-11-19 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1831-01-29 -1831-03-11 -1831-05-26 -1831-07-23 -1831-08-18 -1831-08-21 -1831-09-16 -1831-10-17 -1831-12-18 -1832-11-12 -1833-04-26 -1833-05-08 -1833-05-08 -1833-05-08 -1833-05-08 -1833-05-08 -1833-07-05 -1833-10-07 -1833-11-29 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-21 -1834-02-18 -1834-04-22 -1834-05-14 -1834-05-14 -1834-05-14 -1834-05-14 -1834-05-14 -1834-05-14 -1834-10-21 -1834-11-05 -1835-01-03 -1835-06-18 -1835-07-24 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-09-05 -1835-12-03 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-05-21 -1836-05-27 -1836-09-08 -1836-10-14 -1837-01-18 -1837-03-01 -1837-03-30 -1837-07-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-10-17 -1837-12-16 -1838-01-22 -1838-08-04 -1838-12-02 -1839-01-07 -1839-02-02 -1839-04-23 -1839-04-23 -1839-04-23 -1839-04-23 -1839-04-23 -1839-05-14 -1839-05-14 -1839-05-14 -1839-05-14 -1839-05-14 -1839-05-14 -1839-07-28 -1840-01-18 -1840-01-23 -1840-02-03 -1840-02-24 -1840-12-13 -1840-12-19 -1841-04-18 -1841-05-17 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-06-04 -1841-12-05 -1842-04-04 -1842-05-18 -1842-05-18 -1842-05-18 -1842-05-18 -1842-05-18 -1842-05-18 -1842-06-03 -1842-12-29 -1843-03-14 -1843-07-08 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-10-10 -1843-12-17 -1844-03-26 -1844-07-20 -1844-07-23 -1844-08-22 -1844-09-20 -1844-11-03 -1845-05-08 -1845-05-13 -1845-09-06 -1846-01-05 -1846-02-09 -1846-08-14 -1846-09-19 -1846-12-05 -1847-01-09 +1823-06-02 +1823-08-05 +1823-10-07 +1824-01-07 +1824-01-20 +1824-02-07 +1824-06-28 +1824-07-09 +1824-08-12 +1824-08-24 +1824-09-15 +1825-02-05 +1825-02-18 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-06-17 +1825-08-23 +1825-08-27 +1825-11-05 +1825-12-26 +1826-01-15 +1826-07-24 +1826-11-09 +1826-11-28 +1827-05-12 +1827-06-11 +1827-06-21 +1827-07-04 +1827-07-22 +1827-07-22 +1827-07-22 +1827-08-06 +1827-12-12 +1828-02-13 +1828-06-19 +1828-10-13 +1829-02-16 +1829-07-23 +1829-11-08 +1830-05-26 +1830-11-18 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1831-01-28 +1831-03-10 +1831-05-25 +1831-07-22 +1831-08-17 +1831-08-20 +1831-09-15 +1831-10-16 +1831-12-17 +1832-11-11 +1833-04-25 +1833-05-07 +1833-05-07 +1833-05-07 +1833-05-07 +1833-05-07 +1833-07-04 +1833-10-06 +1833-11-28 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-20 +1834-02-17 +1834-04-21 +1834-05-13 +1834-05-13 +1834-05-13 +1834-05-13 +1834-05-13 +1834-05-13 +1834-10-20 +1834-11-04 +1835-01-02 +1835-06-17 +1835-07-23 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-09-04 +1835-12-02 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-05-20 +1836-05-26 +1836-09-07 +1836-10-13 +1837-01-17 +1837-02-28 +1837-03-29 +1837-07-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-10-16 +1837-12-15 +1838-01-21 +1838-08-03 +1838-12-01 +1839-01-06 +1839-02-01 +1839-04-22 +1839-04-22 +1839-04-22 +1839-04-22 +1839-04-22 +1839-05-13 +1839-05-13 +1839-05-13 +1839-05-13 +1839-05-13 +1839-05-13 +1839-07-27 +1840-01-17 +1840-01-22 +1840-02-02 +1840-02-23 +1840-12-12 +1840-12-18 +1841-04-17 +1841-05-16 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-06-03 +1841-12-04 +1842-04-03 +1842-05-17 +1842-05-17 +1842-05-17 +1842-05-17 +1842-05-17 +1842-05-17 +1842-06-02 +1842-12-28 +1843-03-13 +1843-07-07 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-10-09 +1843-12-16 +1844-03-25 +1844-07-19 +1844-07-22 +1844-08-21 +1844-09-19 +1844-11-02 +1845-05-07 +1845-05-12 +1845-09-05 +1846-01-04 +1846-02-08 +1846-08-13 +1846-09-18 +1846-12-04 +1847-01-08 +1847-02-24 1847-02-25 -1847-02-26 -1847-04-05 -1847-04-22 -1847-07-26 -1847-08-23 -1848-03-01 -1849-03-08 -1849-03-31 -1849-04-16 -1849-05-22 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-08-28 -1849-09-11 -1850-01-21 -1850-03-19 -1850-04-08 -1850-08-30 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-12-31 -1851-02-12 -1851-02-12 -1851-02-12 -1851-02-12 -1851-02-12 -1851-03-15 -1851-06-04 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-27 -1851-10-08 -1851-11-11 -1852-02-10 -1852-02-20 -1852-04-13 -1852-04-24 -1852-06-15 -1852-09-02 -1852-09-12 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-31 -1853-01-26 -1853-07-26 -1853-09-16 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-11-21 -1853-12-13 -1853-12-30 -1854-01-31 -1854-02-28 -1854-05-03 -1854-05-30 -1854-05-30 -1854-05-30 -1854-05-30 -1854-05-30 -1854-07-17 -1854-12-22 -1854-12-29 -1855-02-24 -1855-10-31 -1855-11-07 -1855-11-30 -1855-12-24 -1856-01-13 -1856-05-07 -1856-05-20 -1856-05-22 -1856-06-26 -1856-07-12 -1856-10-06 -1856-11-16 -1857-04-14 -1857-05-23 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-07-14 -1857-08-14 -1857-10-19 -1858-02-15 -1858-02-24 -1858-07-04 -1858-07-15 -1858-10-25 -1858-10-28 -1859-01-18 -1859-03-08 -1859-03-20 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-04-10 -1859-05-23 -1859-08-31 -1859-09-17 -1859-09-17 -1859-09-17 -1859-11-21 -1859-12-31 -1860-03-10 -1860-03-12 -1860-05-15 -1860-08-22 -1860-09-19 -1860-12-03 -1861-04-23 -1861-08-14 -1861-12-06 -1861-12-19 -1862-01-12 -1862-03-01 -1862-03-20 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-06-26 -1863-02-22 -1863-06-17 -1863-09-12 -1863-12-27 -1863-12-29 -1864-02-24 -1864-07-19 -1864-08-12 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-12-04 -1865-12-17 -1866-01-04 -1866-01-10 -1866-03-29 -1866-04-07 -1866-04-14 -1866-09-03 -1866-10-21 -1867-05-11 -1867-06-21 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-09-03 -1867-09-26 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-27 -1867-11-10 -1867-11-12 -1867-11-12 -1867-11-12 -1867-11-12 -1867-11-12 -1867-11-25 -1869-01-21 -1869-01-27 -1869-02-18 -1869-03-09 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-12-03 -1870-01-17 -1870-07-27 -1870-08-21 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-21 -1870-10-08 -1870-11-19 -1871-01-22 -1871-01-27 -1871-02-14 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-03-31 -1871-07-29 -1871-08-01 -1871-08-03 -1871-08-17 -1872-02-16 -1872-05-28 -1872-06-06 -1872-07-26 -1872-10-09 -1873-02-27 -1873-05-29 -1873-07-02 -1873-07-12 -1873-07-15 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-27 -1873-07-30 -1873-09-06 -1873-09-06 -1873-09-06 -1873-09-06 -1873-12-23 -1874-01-02 -1874-04-04 -1874-04-14 -1874-07-09 -1874-07-21 -1874-09-24 -1874-10-24 -1874-11-26 -1874-12-16 -1875-03-28 -1875-04-24 -1875-05-12 -1875-07-18 -1875-08-16 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-11-02 -1875-11-27 -1876-08-16 -1876-10-18 -1876-12-15 -1876-12-28 -1877-03-02 -1877-03-06 -1877-03-19 -1877-06-22 -1877-07-17 -1877-09-01 -1877-10-04 -1878-01-07 -1878-02-02 -1878-04-11 -1878-04-30 -1878-06-26 -1878-10-17 -1878-10-21 -1878-11-17 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-03-10 -1879-07-23 -1879-09-16 -1879-12-02 -1880-05-30 +1847-04-04 +1847-04-21 +1847-07-25 +1847-08-22 +1848-02-29 +1849-03-07 +1849-03-30 +1849-04-15 +1849-05-21 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-08-27 +1849-09-10 +1850-01-20 +1850-03-18 +1850-04-07 +1850-08-29 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-12-30 +1851-02-11 +1851-02-11 +1851-02-11 +1851-02-11 +1851-02-11 +1851-03-14 +1851-06-03 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-26 +1851-10-07 +1851-11-10 +1852-02-09 +1852-02-19 +1852-04-12 +1852-04-23 +1852-06-14 +1852-09-01 +1852-09-11 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-30 +1853-01-25 +1853-07-25 +1853-09-15 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-11-20 +1853-12-12 +1853-12-29 +1854-01-30 +1854-02-27 +1854-05-02 +1854-05-29 +1854-05-29 +1854-05-29 +1854-05-29 +1854-05-29 +1854-07-16 +1854-12-21 +1854-12-28 +1855-02-23 +1855-10-30 +1855-11-06 +1855-11-29 +1855-12-23 +1856-01-12 +1856-05-06 +1856-05-19 +1856-05-21 +1856-06-25 +1856-07-11 +1856-10-05 +1856-11-15 +1857-04-13 +1857-05-22 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-07-13 +1857-08-13 +1857-10-18 +1858-02-14 +1858-02-23 +1858-07-03 +1858-07-14 +1858-10-24 +1858-10-27 +1859-01-17 +1859-03-07 +1859-03-19 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-04-09 +1859-05-22 +1859-08-30 +1859-09-16 +1859-09-16 +1859-09-16 +1859-11-20 +1859-12-30 +1860-03-09 +1860-03-11 +1860-05-14 +1860-08-21 +1860-09-18 +1860-12-02 +1861-04-22 +1861-08-13 +1861-12-05 +1861-12-18 +1862-01-11 +1862-02-28 +1862-03-19 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-06-25 +1863-02-21 +1863-06-16 +1863-09-11 +1863-12-26 +1863-12-28 +1864-02-23 +1864-07-18 +1864-08-11 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-12-03 +1865-12-16 +1866-01-03 +1866-01-09 +1866-03-28 +1866-04-06 +1866-04-13 +1866-09-02 +1866-10-20 +1867-05-10 +1867-06-20 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-09-02 +1867-09-25 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-10-26 +1867-11-09 +1867-11-11 +1867-11-11 +1867-11-11 +1867-11-11 +1867-11-11 +1867-11-24 +1869-01-20 +1869-01-26 +1869-02-17 +1869-03-08 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-12-02 +1870-01-16 +1870-07-26 +1870-08-20 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-20 +1870-10-07 +1870-11-18 +1871-01-21 +1871-01-26 +1871-02-13 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-03-30 +1871-07-28 +1871-07-31 +1871-08-02 +1871-08-16 +1872-02-15 +1872-05-27 +1872-06-05 +1872-07-25 +1872-10-08 +1873-02-26 +1873-05-28 +1873-07-01 +1873-07-11 +1873-07-14 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-26 +1873-07-29 +1873-09-05 +1873-09-05 +1873-09-05 +1873-09-05 +1873-12-22 +1874-01-01 +1874-04-03 +1874-04-13 +1874-07-08 +1874-07-20 +1874-09-23 +1874-10-23 +1874-11-25 +1874-12-15 +1875-03-27 +1875-04-23 +1875-05-11 +1875-07-17 +1875-08-15 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-11-01 +1875-11-26 +1876-08-15 +1876-10-17 +1876-12-14 +1876-12-27 +1877-03-01 +1877-03-05 +1877-03-18 +1877-06-21 +1877-07-16 +1877-08-31 +1877-10-03 +1878-01-06 +1878-02-01 +1878-04-10 +1878-04-29 +1878-06-25 +1878-10-16 +1878-10-20 +1878-11-16 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-03-09 +1879-07-22 +1879-09-15 +1879-12-01 +1880-05-29 +1880-06-29 1880-06-30 -1880-07-01 -1880-11-03 -1880-11-30 -1880-12-07 -1881-01-23 -1881-01-27 -1881-05-03 -1881-05-15 -1881-06-23 -1881-07-12 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-09-13 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-14 -1882-05-01 -1882-05-25 -1882-07-22 -1882-11-09 -1883-04-11 -1883-05-26 -1883-06-01 -1883-10-14 -1883-10-20 -1883-10-29 -1883-10-29 -1883-10-29 -1883-10-29 -1883-10-29 -1884-08-05 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-09-23 -1884-11-05 -1884-11-20 -1884-12-15 -1885-01-03 -1885-01-22 -1885-02-20 -1885-05-25 -1885-06-21 -1885-08-08 -1886-03-21 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-07 -1886-04-25 -1886-06-01 -1886-07-25 -1886-11-10 -1886-12-02 -1887-01-16 -1887-06-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-10-26 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-12-08 -1888-01-15 -1888-02-11 -1888-08-08 -1888-11-03 -1888-11-15 -1889-03-10 -1889-06-06 -1889-06-13 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-09-20 -1890-02-24 -1890-04-29 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-09-15 +1880-11-02 +1880-11-29 +1880-12-06 +1881-01-22 +1881-01-26 +1881-05-02 +1881-05-14 +1881-06-22 +1881-07-11 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-09-12 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-13 +1882-04-30 +1882-05-24 +1882-07-21 +1882-11-08 +1883-04-10 +1883-05-25 +1883-05-31 +1883-10-13 +1883-10-19 +1883-10-28 +1883-10-28 +1883-10-28 +1883-10-28 +1883-10-28 +1884-08-04 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-09-22 +1884-11-04 +1884-11-19 +1884-12-14 +1885-01-02 +1885-01-21 +1885-02-19 +1885-05-24 +1885-06-20 +1885-08-07 +1886-03-20 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-06 +1886-04-24 +1886-05-31 +1886-07-24 +1886-11-09 +1886-12-01 +1887-01-15 +1887-06-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-10-25 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-12-07 +1888-01-14 +1888-02-10 +1888-08-07 +1888-11-02 +1888-11-14 +1889-03-09 +1889-06-05 +1889-06-12 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-09-19 +1890-02-23 +1890-04-28 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-09-14 +1890-10-04 1890-10-05 -1890-10-06 -1890-11-10 -1891-01-10 -1891-01-13 -1891-06-20 -1891-07-13 -1892-03-26 -1892-05-13 -1892-09-10 -1893-01-18 -1893-07-10 +1890-11-09 +1891-01-09 +1891-01-12 +1891-06-19 +1891-07-12 +1892-03-25 +1892-05-12 +1892-09-09 +1893-01-17 +1893-07-09 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-20 -1894-06-05 -1894-06-18 -1894-10-01 -1894-10-10 -1894-11-28 -1895-01-20 -1895-02-07 -1895-09-04 -1895-11-01 -1895-12-13 -1895-12-31 -1896-01-02 -1896-01-13 -1896-01-26 -1896-02-26 -1896-03-09 -1896-05-05 -1896-05-10 -1896-08-08 -1896-08-14 -1896-08-25 -1897-02-21 -1897-06-09 -1897-06-12 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-11-03 -1897-12-01 -1898-02-23 -1898-02-27 -1898-03-06 -1898-04-13 -1898-05-22 -1898-06-21 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1899-01-30 -1899-02-16 -1899-03-01 -1899-06-24 -1899-08-27 -1899-10-20 -1900-04-27 -1900-09-17 -1900-10-19 -1901-02-10 -1901-04-13 -1901-05-12 -1901-05-12 -1901-05-12 -1901-05-12 -1901-05-29 -1901-10-05 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1902-01-24 -1902-05-14 -1902-06-14 -1902-07-29 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-17 -1903-03-20 -1903-04-23 -1903-05-12 -1904-05-24 -1904-06-30 -1904-08-20 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-10-14 -1904-11-30 -1905-03-27 -1905-06-07 -1905-06-15 -1905-07-20 -1905-09-05 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-23 -1905-12-15 -1906-01-11 -1906-07-25 -1906-08-27 -1906-09-02 -1906-11-02 -1906-12-13 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1908-01-24 -1908-03-28 -1908-05-03 -1908-05-28 -1908-06-27 -1908-06-29 -1908-12-19 -1909-11-21 -1910-01-23 -1910-02-16 -1910-03-05 -1910-03-15 -1910-04-06 -1910-05-12 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-06-18 -1910-08-17 -1910-11-06 -1911-05-05 -1911-06-22 -1911-11-19 -1912-04-14 -1912-05-02 -1912-06-12 -1913-01-30 -1913-02-12 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-07-14 -1913-09-26 -1913-09-26 -1913-09-26 -1913-10-24 -1913-12-15 -1914-02-18 -1914-08-19 -1915-02-05 -1915-03-05 -1915-08-10 -1915-08-15 -1915-11-05 -1915-12-12 -1915-12-18 -1916-05-05 -1916-05-12 -1916-06-07 -1916-06-11 +1894-06-04 +1894-06-17 +1894-09-30 +1894-10-09 +1894-11-27 +1895-01-19 +1895-02-06 +1895-09-03 +1895-10-31 +1895-12-12 +1895-12-30 +1896-01-01 +1896-01-12 +1896-01-25 +1896-02-25 +1896-03-08 +1896-05-04 +1896-05-09 +1896-08-07 +1896-08-13 +1896-08-24 +1897-02-20 +1897-06-08 +1897-06-11 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-11-02 +1897-11-30 +1898-02-22 +1898-02-26 +1898-03-05 +1898-04-12 +1898-05-21 +1898-06-20 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1899-01-29 +1899-02-15 +1899-02-28 +1899-06-23 +1899-08-26 +1899-10-19 +1900-04-26 +1900-09-16 +1900-10-18 +1901-02-09 +1901-04-12 +1901-05-11 +1901-05-11 +1901-05-11 +1901-05-11 +1901-05-28 +1901-10-04 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1902-01-23 +1902-05-13 +1902-06-13 +1902-07-28 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-16 +1903-03-19 +1903-04-22 +1903-05-11 +1904-05-23 +1904-06-29 +1904-08-19 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-10-13 +1904-11-29 +1905-03-26 +1905-06-06 +1905-06-14 +1905-07-19 +1905-09-04 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-22 +1905-12-14 +1906-01-10 +1906-07-24 +1906-08-26 +1906-09-01 +1906-11-01 +1906-12-12 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1908-01-23 +1908-03-27 +1908-05-02 +1908-05-27 +1908-06-26 +1908-06-28 +1908-12-18 +1909-11-20 +1910-01-22 +1910-02-15 +1910-03-04 +1910-03-14 +1910-04-05 +1910-05-11 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-06-17 +1910-08-16 +1910-11-05 +1911-05-04 +1911-06-21 +1911-11-18 +1912-04-13 +1912-05-01 +1912-06-11 +1913-01-29 +1913-02-11 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-07-13 +1913-09-25 +1913-09-25 +1913-09-25 +1913-10-23 +1913-12-14 +1914-02-17 +1914-08-18 +1915-02-04 +1915-03-04 +1915-08-09 +1915-08-14 +1915-11-04 +1915-12-11 +1915-12-17 +1916-05-04 +1916-05-11 +1916-06-06 +1916-06-10 +1916-08-08 1916-08-09 -1916-08-10 -1917-04-16 -1917-06-28 -1917-12-07 -1918-02-10 -1918-02-10 -1918-02-10 -1918-02-10 -1918-02-10 -1918-02-10 -1918-09-11 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1919-02-07 -1919-04-07 -1919-06-26 -1919-08-22 -1919-10-04 -1919-10-04 -1919-10-21 -1920-01-05 -1920-05-06 -1920-06-30 -1920-08-04 -1920-08-18 -1920-10-19 -1921-02-19 -1921-03-14 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-06-02 -1921-07-03 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-11-04 -1921-11-04 -1921-11-04 -1921-11-04 -1921-11-04 -1921-11-04 -1922-01-15 -1922-04-20 -1922-06-22 -1922-07-22 -1923-01-24 -1923-03-08 -1923-03-24 -1923-05-29 -1923-08-12 -1923-08-31 -1923-09-20 -1923-11-15 -1923-12-16 -1924-01-26 -1924-03-16 -1924-05-06 -1924-06-23 -1924-07-04 -1924-11-22 -1924-12-10 -1925-02-17 -1925-06-05 -1925-09-05 -1925-09-08 -1925-10-23 -1925-12-30 -1926-03-30 -1926-04-10 -1926-05-27 -1926-09-07 -1926-12-07 -1927-02-11 -1927-03-27 -1927-04-05 -1927-04-05 -1927-04-05 -1927-04-05 -1927-04-05 -1927-04-05 -1927-05-16 -1927-07-16 -1927-07-26 -1927-08-26 -1927-09-03 -1927-11-01 -1927-11-28 -1928-01-10 -1928-02-25 -1928-05-11 -1928-07-29 -1928-08-27 -1929-03-07 -1929-04-01 -1929-04-05 -1929-05-29 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1930-02-01 -1930-04-09 -1930-04-09 -1930-04-09 -1930-04-09 -1930-06-11 -1930-07-07 -1930-09-28 -1930-12-21 -1931-04-08 -1931-07-01 -1931-08-30 -1931-10-31 -1931-10-31 -1931-12-12 -1932-03-15 -1932-03-30 -1932-04-21 -1932-05-31 -1932-10-27 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-08-27 -1933-08-30 -1933-11-30 -1933-12-22 -1934-06-02 -1934-08-13 -1934-09-12 -1934-11-10 -1934-11-10 -1934-11-10 -1934-11-10 -1934-11-10 -1934-11-10 -1935-04-11 -1936-01-31 -1936-06-23 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-11-04 -1937-01-22 -1937-02-04 +1917-04-15 +1917-06-27 +1917-12-06 +1918-02-09 +1918-02-09 +1918-02-09 +1918-02-09 +1918-02-09 +1918-02-09 +1918-09-10 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1919-02-06 +1919-04-06 +1919-06-25 +1919-08-21 +1919-10-03 +1919-10-03 +1919-10-20 +1920-01-04 +1920-05-05 +1920-06-29 +1920-08-03 +1920-08-17 +1920-10-18 +1921-02-18 +1921-03-13 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-06-01 +1921-07-02 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-11-03 +1921-11-03 +1921-11-03 +1921-11-03 +1921-11-03 +1921-11-03 +1922-01-14 +1922-04-19 +1922-06-21 +1922-07-21 +1923-01-23 +1923-03-07 +1923-03-23 +1923-05-28 +1923-08-11 +1923-08-30 +1923-09-19 +1923-11-14 +1923-12-15 +1924-01-25 +1924-03-15 +1924-05-05 +1924-06-22 +1924-07-03 +1924-11-21 +1924-12-09 +1925-02-16 +1925-06-04 +1925-09-04 +1925-09-07 +1925-10-22 +1925-12-29 +1926-03-29 +1926-04-09 +1926-05-26 +1926-09-06 +1926-12-06 +1927-02-10 +1927-03-26 +1927-04-04 +1927-04-04 +1927-04-04 +1927-04-04 +1927-04-04 +1927-04-04 +1927-05-15 +1927-07-15 +1927-07-25 +1927-08-25 +1927-09-02 +1927-10-31 +1927-11-27 +1928-01-09 +1928-02-24 +1928-05-10 +1928-07-28 +1928-08-26 +1929-03-06 +1929-03-31 +1929-04-04 +1929-05-28 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1930-01-31 +1930-04-08 +1930-04-08 +1930-04-08 +1930-04-08 +1930-06-10 +1930-07-06 +1930-09-27 +1930-12-20 +1931-04-07 +1931-06-30 +1931-08-29 +1931-10-30 +1931-10-30 +1931-12-11 +1932-03-14 +1932-03-29 +1932-04-20 +1932-05-30 +1932-10-26 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-08-26 +1933-08-29 +1933-11-29 +1933-12-21 +1934-06-01 +1934-08-12 +1934-09-11 +1934-11-09 +1934-11-09 +1934-11-09 +1934-11-09 +1934-11-09 +1934-11-09 +1935-04-10 +1936-01-30 +1936-06-22 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-11-03 +1937-01-21 +1937-02-03 +1937-05-29 1937-05-30 -1937-05-31 -1937-06-26 -1937-09-08 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-17 -1938-01-27 +1937-06-25 +1937-09-07 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-16 +1938-01-26 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-26 -1938-05-17 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-10-08 -1939-01-24 -1939-02-19 -1939-03-23 -1939-05-05 -1939-07-13 -1939-08-04 +1938-05-16 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-10-07 +1939-01-23 +1939-02-18 +1939-03-22 +1939-05-04 +1939-07-12 +1939-08-03 +1940-02-09 1940-02-10 -1940-02-11 -1940-03-27 -1940-04-28 -1940-05-01 -1940-08-07 -1940-08-18 -1940-09-24 -1941-03-24 -1941-04-19 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-11-07 -1942-02-16 -1942-03-23 -1943-01-31 -1943-03-30 -1943-05-08 -1943-11-28 -1944-02-28 -1944-05-06 -1945-09-26 -1945-10-08 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1946-01-18 -1946-02-02 -1946-02-04 -1946-04-29 -1946-05-11 -1947-01-05 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-03-03 -1947-03-26 -1947-05-17 -1947-06-25 -1947-10-11 -1947-12-28 -1948-01-27 -1948-05-20 -1948-06-17 -1948-12-27 -1949-06-26 -1949-09-05 -1949-09-20 -1950-01-23 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-23 -1950-04-10 -1950-09-27 -1951-03-31 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-09-06 -1952-02-06 -1952-02-13 -1952-06-23 -1953-01-10 -1953-02-10 -1953-02-23 -1953-02-23 -1953-02-23 -1953-02-23 -1953-02-23 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-05-19 -1953-07-23 -1953-11-27 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-25 -1954-02-21 -1954-05-19 -1954-07-04 -1954-07-07 -1954-10-17 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-12-25 -1955-05-10 -1955-07-22 -1955-08-07 -1955-08-24 -1955-09-23 -1955-11-23 -1956-05-18 -1956-08-06 -1956-10-09 -1957-04-20 -1957-05-03 -1957-08-15 -1957-08-24 -1957-09-05 -1958-07-25 -1958-07-25 -1958-07-25 -1958-07-25 -1958-07-25 -1958-10-13 -1958-10-24 -1959-01-13 -1959-01-24 -1959-03-22 -1959-04-30 -1959-09-08 -1959-09-23 -1959-11-23 -1959-12-21 -1960-01-16 -1960-03-18 -1960-04-05 -1960-07-17 -1960-07-25 -1960-08-30 -1960-11-25 -1961-05-15 -1961-07-10 -1961-07-29 -1961-07-29 -1961-09-25 -1961-10-15 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1962-06-07 -1962-07-08 -1962-08-18 -1962-09-02 -1963-01-08 -1963-03-31 +1940-03-26 +1940-04-27 +1940-04-30 +1940-08-06 +1940-08-17 +1940-09-23 +1941-03-23 +1941-04-18 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-11-06 +1942-02-15 +1942-03-22 +1943-01-30 +1943-03-29 +1943-05-07 +1943-11-27 +1944-02-27 +1944-05-05 +1945-09-25 +1945-10-07 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1946-01-17 +1946-02-01 +1946-02-03 +1946-04-28 +1946-05-10 +1947-01-04 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-03-02 +1947-03-25 +1947-05-16 +1947-06-24 +1947-10-10 +1947-12-27 +1948-01-26 +1948-05-19 +1948-06-16 +1948-12-26 +1949-06-25 +1949-09-04 +1949-09-19 +1950-01-22 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-22 +1950-04-09 +1950-09-26 +1951-03-30 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-09-05 +1952-02-05 +1952-02-12 +1952-06-22 +1953-01-09 +1953-02-09 +1953-02-22 +1953-02-22 +1953-02-22 +1953-02-22 +1953-02-22 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-05-18 +1953-07-22 +1953-11-26 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-24 +1954-02-20 +1954-05-18 +1954-07-03 +1954-07-06 +1954-10-16 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-12-24 +1955-05-09 +1955-07-21 +1955-08-06 +1955-08-23 +1955-09-22 +1955-11-22 +1956-05-17 +1956-08-05 +1956-10-08 +1957-04-19 +1957-05-02 +1957-08-14 +1957-08-23 +1957-09-04 +1958-07-24 +1958-07-24 +1958-07-24 +1958-07-24 +1958-07-24 +1958-10-12 +1958-10-23 +1959-01-12 +1959-01-23 +1959-03-21 +1959-04-29 +1959-09-07 +1959-09-22 +1959-11-22 +1959-12-20 +1960-01-15 +1960-03-17 +1960-04-04 +1960-07-16 +1960-07-24 +1960-08-29 +1960-11-24 +1961-05-14 +1961-07-09 +1961-07-28 +1961-07-28 +1961-09-24 +1961-10-14 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1962-06-06 +1962-07-07 +1962-08-17 +1962-09-01 +1963-01-07 +1963-03-30 +1964-04-14 1964-04-15 -1964-04-16 -1964-10-20 -1964-10-30 -1964-11-08 -1964-12-14 -1965-03-20 -1965-03-27 -1965-05-04 -1965-06-01 -1965-09-19 -1966-02-15 -1966-08-17 -1966-12-01 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1967-03-22 -1967-04-09 -1967-08-09 -1967-09-05 -1968-02-16 +1964-10-19 +1964-10-29 +1964-11-07 +1964-12-13 +1965-03-19 +1965-03-26 +1965-05-03 +1965-05-31 +1965-09-18 +1966-02-14 +1966-08-16 +1966-11-30 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1967-03-21 +1967-04-08 +1967-08-08 +1967-09-04 +1968-02-15 1971-09-22 1971-09-25 1972-02-25 Index: ql/src/test/results/clientpositive/tez/vector_char_2.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vector_char_2.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/vector_char_2.q.out (working copy) @@ -77,12 +77,12 @@ alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: char(20)), key (type: char(10)) - outputColumnNames: value, key + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(UDFToInteger(key)), count() - keys: value (type: char(20)) + aggregations: sum(_col1), count() + keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE @@ -101,15 +101,11 @@ mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: + - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -209,12 +205,12 @@ alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: char(20)), key (type: char(10)) - outputColumnNames: value, key + expressions: value (type: char(20)), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(UDFToInteger(key)), count() - keys: value (type: char(20)) + aggregations: sum(_col1), count() + keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE @@ -233,15 +229,11 @@ mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - sort order: - - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: Index: ql/src/test/results/clientpositive/tez/vector_date_1.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vector_date_1.q.out (revision 0) +++ ql/src/test/results/clientpositive/tez/vector_date_1.q.out (working copy) @@ -0,0 +1,719 @@ +PREHOOK: query: drop table if exists vector_date_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vector_date_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vector_date_1 (dt1 date, dt2 date) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_date_1 +POSTHOOK: query: create table vector_date_1 (dt1 date, dt2 date) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_date_1 +PREHOOK: query: insert into table vector_date_1 + select null, null from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_date_1 +POSTHOOK: query: insert into table vector_date_1 + select null, null from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_date_1 +POSTHOOK: Lineage: vector_date_1.dt1 EXPRESSION [] +POSTHOOK: Lineage: vector_date_1.dt2 EXPRESSION [] +PREHOOK: query: insert into table vector_date_1 + select date '1999-12-31', date '2000-01-01' from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_date_1 +POSTHOOK: query: insert into table vector_date_1 + select date '1999-12-31', date '2000-01-01' from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_date_1 +POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] +POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] +PREHOOK: query: insert into table vector_date_1 + select date '2001-01-01', date '2001-06-01' from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_date_1 +POSTHOOK: query: insert into table vector_date_1 + select date '2001-01-01', date '2001-06-01' from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_date_1 +POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] +POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] +PREHOOK: query: -- column-to-column comparison in select clause +explain +select + dt1, dt2, + -- should be all true + dt1 = dt1, + dt1 != dt2, + dt1 <= dt1, + dt1 <= dt2, + dt1 < dt2, + dt2 >= dt2, + dt2 >= dt1, + dt2 > dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: -- column-to-column comparison in select clause +explain +select + dt1, dt2, + -- should be all true + dt1 = dt1, + dt1 != dt2, + dt1 <= dt1, + dt1 <= dt2, + dt1 < dt2, + dt2 >= dt2, + dt2 >= dt1, + dt2 > dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt1 (type: date), dt2 (type: date), (dt1 = dt1) (type: boolean), (dt1 <> dt2) (type: boolean), (dt1 <= dt1) (type: boolean), (dt1 <= dt2) (type: boolean), (dt1 < dt2) (type: boolean), (dt2 >= dt2) (type: boolean), (dt2 >= dt1) (type: boolean), (dt2 > dt1) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, dt2, + -- should be all true + dt1 = dt1, + dt1 != dt2, + dt1 <= dt1, + dt1 <= dt2, + dt1 < dt2, + dt2 >= dt2, + dt2 >= dt1, + dt2 > dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, dt2, + -- should be all true + dt1 = dt1, + dt1 != dt2, + dt1 <= dt1, + dt1 <= dt2, + dt1 < dt2, + dt2 >= dt2, + dt2 >= dt1, + dt2 > dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1999-12-31 2000-01-01 true true true true true true true true +2001-01-01 2001-06-01 true true true true true true true true +PREHOOK: query: explain +select + dt1, dt2, + -- should be all false + dt1 != dt1, + dt1 = dt2, + dt1 < dt1, + dt1 >= dt2, + dt1 > dt2, + dt2 > dt2, + dt2 <= dt1, + dt2 < dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + dt1, dt2, + -- should be all false + dt1 != dt1, + dt1 = dt2, + dt1 < dt1, + dt1 >= dt2, + dt1 > dt2, + dt2 > dt2, + dt2 <= dt1, + dt2 < dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt1 (type: date), dt2 (type: date), (dt1 <> dt1) (type: boolean), (dt1 = dt2) (type: boolean), (dt1 < dt1) (type: boolean), (dt1 >= dt2) (type: boolean), (dt1 > dt2) (type: boolean), (dt2 > dt2) (type: boolean), (dt2 <= dt1) (type: boolean), (dt2 < dt1) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, dt2, + -- should be all false + dt1 != dt1, + dt1 = dt2, + dt1 < dt1, + dt1 >= dt2, + dt1 > dt2, + dt2 > dt2, + dt2 <= dt1, + dt2 < dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, dt2, + -- should be all false + dt1 != dt1, + dt1 = dt2, + dt1 < dt1, + dt1 >= dt2, + dt1 > dt2, + dt2 > dt2, + dt2 <= dt1, + dt2 < dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1999-12-31 2000-01-01 false false false false false false false false +2001-01-01 2001-06-01 false false false false false false false false +PREHOOK: query: -- column-to-literal/literal-to-column comparison in select clause +explain +select + dt1, + -- should be all true + dt1 != date '1970-01-01', + dt1 >= date '1970-01-01', + dt1 > date '1970-01-01', + dt1 <= date '2100-01-01', + dt1 < date '2100-01-01', + date '1970-01-01' != dt1, + date '1970-01-01' <= dt1, + date '1970-01-01' < dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: -- column-to-literal/literal-to-column comparison in select clause +explain +select + dt1, + -- should be all true + dt1 != date '1970-01-01', + dt1 >= date '1970-01-01', + dt1 > date '1970-01-01', + dt1 <= date '2100-01-01', + dt1 < date '2100-01-01', + date '1970-01-01' != dt1, + date '1970-01-01' <= dt1, + date '1970-01-01' < dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt1 (type: date), (dt1 <> 1970-01-01) (type: boolean), (dt1 >= 1970-01-01) (type: boolean), (dt1 > 1970-01-01) (type: boolean), (dt1 <= 2100-01-01) (type: boolean), (dt1 < 2100-01-01) (type: boolean), (1970-01-01 <> dt1) (type: boolean), (1970-01-01 <= dt1) (type: boolean), (1970-01-01 < dt1) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, + -- should be all true + dt1 != date '1970-01-01', + dt1 >= date '1970-01-01', + dt1 > date '1970-01-01', + dt1 <= date '2100-01-01', + dt1 < date '2100-01-01', + date '1970-01-01' != dt1, + date '1970-01-01' <= dt1, + date '1970-01-01' < dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, + -- should be all true + dt1 != date '1970-01-01', + dt1 >= date '1970-01-01', + dt1 > date '1970-01-01', + dt1 <= date '2100-01-01', + dt1 < date '2100-01-01', + date '1970-01-01' != dt1, + date '1970-01-01' <= dt1, + date '1970-01-01' < dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL +1999-12-31 true true true true true true true true +2001-01-01 true true true true true true true true +PREHOOK: query: explain +select + dt1, + -- should all be false + dt1 = date '1970-01-01', + dt1 <= date '1970-01-01', + dt1 < date '1970-01-01', + dt1 >= date '2100-01-01', + dt1 > date '2100-01-01', + date '1970-01-01' = dt1, + date '1970-01-01' >= dt1, + date '1970-01-01' > dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + dt1, + -- should all be false + dt1 = date '1970-01-01', + dt1 <= date '1970-01-01', + dt1 < date '1970-01-01', + dt1 >= date '2100-01-01', + dt1 > date '2100-01-01', + date '1970-01-01' = dt1, + date '1970-01-01' >= dt1, + date '1970-01-01' > dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt1 (type: date), (dt1 = 1970-01-01) (type: boolean), (dt1 <= 1970-01-01) (type: boolean), (dt1 < 1970-01-01) (type: boolean), (dt1 >= 2100-01-01) (type: boolean), (dt1 > 2100-01-01) (type: boolean), (1970-01-01 = dt1) (type: boolean), (1970-01-01 >= dt1) (type: boolean), (1970-01-01 > dt1) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, + -- should all be false + dt1 = date '1970-01-01', + dt1 <= date '1970-01-01', + dt1 < date '1970-01-01', + dt1 >= date '2100-01-01', + dt1 > date '2100-01-01', + date '1970-01-01' = dt1, + date '1970-01-01' >= dt1, + date '1970-01-01' > dt1 +from vector_date_1 order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, + -- should all be false + dt1 = date '1970-01-01', + dt1 <= date '1970-01-01', + dt1 < date '1970-01-01', + dt1 >= date '2100-01-01', + dt1 > date '2100-01-01', + date '1970-01-01' = dt1, + date '1970-01-01' >= dt1, + date '1970-01-01' > dt1 +from vector_date_1 order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL +1999-12-31 false false false false false false false false +2001-01-01 false false false false false false false false +PREHOOK: query: -- column-to-column comparisons in predicate +-- all rows with non-null dt1 should be returned +explain +select + dt1, dt2 +from vector_date_1 +where + dt1 = dt1 + and dt1 != dt2 + and dt1 < dt2 + and dt1 <= dt2 + and dt2 > dt1 + and dt2 >= dt1 +order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: -- column-to-column comparisons in predicate +-- all rows with non-null dt1 should be returned +explain +select + dt1, dt2 +from vector_date_1 +where + dt1 = dt1 + and dt1 != dt2 + and dt1 < dt2 + and dt1 <= dt2 + and dt2 > dt1 + and dt2 >= dt1 +order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((dt1 = dt1) and (dt1 <> dt2)) and (dt1 < dt2)) and (dt1 <= dt2)) and (dt2 > dt1)) and (dt2 >= dt1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: dt1 (type: date), dt2 (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, dt2 +from vector_date_1 +where + dt1 = dt1 + and dt1 != dt2 + and dt1 < dt2 + and dt1 <= dt2 + and dt2 > dt1 + and dt2 >= dt1 +order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, dt2 +from vector_date_1 +where + dt1 = dt1 + and dt1 != dt2 + and dt1 < dt2 + and dt1 <= dt2 + and dt2 > dt1 + and dt2 >= dt1 +order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +1999-12-31 2000-01-01 +2001-01-01 2001-06-01 +PREHOOK: query: -- column-to-literal/literal-to-column comparison in predicate +-- only a single row should be returned +explain +select + dt1, dt2 +from vector_date_1 +where + dt1 = date '2001-01-01' + and date '2001-01-01' = dt1 + and dt1 != date '1970-01-01' + and date '1970-01-01' != dt1 + and dt1 > date '1970-01-01' + and dt1 >= date '1970-01-01' + and date '1970-01-01' < dt1 + and date '1970-01-01' <= dt1 +order by dt1 +PREHOOK: type: QUERY +POSTHOOK: query: -- column-to-literal/literal-to-column comparison in predicate +-- only a single row should be returned +explain +select + dt1, dt2 +from vector_date_1 +where + dt1 = date '2001-01-01' + and date '2001-01-01' = dt1 + and dt1 != date '1970-01-01' + and date '1970-01-01' != dt1 + and dt1 > date '1970-01-01' + and dt1 >= date '1970-01-01' + and date '1970-01-01' < dt1 + and date '1970-01-01' <= dt1 +order by dt1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_date_1 + Statistics: Num rows: 3 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((dt1 = 2001-01-01) and (2001-01-01 = dt1)) and (dt1 <> 1970-01-01)) and (1970-01-01 <> dt1)) and (dt1 > 1970-01-01)) and (dt1 >= 1970-01-01)) and (1970-01-01 < dt1)) and (1970-01-01 <= dt1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: dt2 (type: date) + outputColumnNames: _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: 2001-01-01 (type: date) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: 2001-01-01 (type: date), VALUE._col0 (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt1, dt2 +from vector_date_1 +where + dt1 = date '2001-01-01' + and date '2001-01-01' = dt1 + and dt1 != date '1970-01-01' + and date '1970-01-01' != dt1 + and dt1 > date '1970-01-01' + and dt1 >= date '1970-01-01' + and date '1970-01-01' < dt1 + and date '1970-01-01' <= dt1 +order by dt1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt1, dt2 +from vector_date_1 +where + dt1 = date '2001-01-01' + and date '2001-01-01' = dt1 + and dt1 != date '1970-01-01' + and date '1970-01-01' != dt1 + and dt1 > date '1970-01-01' + and dt1 >= date '1970-01-01' + and date '1970-01-01' < dt1 + and date '1970-01-01' <= dt1 +order by dt1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +2001-01-01 2001-06-01 +PREHOOK: query: drop table vector_date_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@vector_date_1 +PREHOOK: Output: default@vector_date_1 +POSTHOOK: query: drop table vector_date_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@vector_date_1 +POSTHOOK: Output: default@vector_date_1 Index: ql/src/test/results/clientpositive/tez/vector_decimal_round.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vector_decimal_round.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/vector_decimal_round.q.out (working copy) @@ -114,18 +114,18 @@ alias: decimal_tbl_txt Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) - outputColumnNames: _col0, _col1 + expressions: dec (type: decimal(10,0)) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,0)), _col1 (type: decimal(11,0)) + value expressions: _col0 (type: decimal(10,0)) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(10,0)), VALUE._col1 (type: decimal(11,0)) + expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -264,18 +264,18 @@ alias: decimal_tbl_rc Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) - outputColumnNames: _col0, _col1 + expressions: dec (type: decimal(10,0)) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,0)), _col1 (type: decimal(11,0)) + value expressions: _col0 (type: decimal(10,0)) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(10,0)), VALUE._col1 (type: decimal(11,0)) + expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -415,19 +415,19 @@ alias: decimal_tbl_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) - outputColumnNames: _col0, _col1 + expressions: dec (type: decimal(10,0)) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,0)), _col1 (type: decimal(11,0)) + value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(10,0)), VALUE._col1 (type: decimal(11,0)) + expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/vector_decimal_round_2.q.out (working copy) @@ -121,7 +121,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_1_orc #### A masked pattern was here #### -55555 55555 55555.0 55555.00 55555.000 55560 55600 56000 60000 100000 0 0 0 +55555 55555 55555 55555 55555 55560 55600 56000 60000 100000 0 0 0 PREHOOK: query: create table decimal_tbl_2_orc (pos decimal(38,18), neg decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE @@ -240,7 +240,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_2_orc #### A masked pattern was here #### -125 125 125.3 125.32 125.315 125.3150 130 100 0 0 -125 -125 -125.3 -125.32 -125.315 -125.3150 -130 -100 0 0 +125 125 125.3 125.32 125.315 125.315 130 100 0 0 -125 -125 -125.3 -125.32 -125.315 -125.315 -130 -100 0 0 PREHOOK: query: create table decimal_tbl_3_orc (dec decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE @@ -402,7 +402,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_tbl_3_orc #### A masked pattern was here #### -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3.1 3.14 3.142 3.1416 3.14159 3.141593 3.1415927 3.14159265 3.141592654 3.1415926536 3.14159265359 3.141592653590 3.1415926535898 3.1415926535898 3.14159265358979 3.141592653589793 3.1415926535897930 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3.1 3.14 3.142 3.1416 3.14159 3.141593 3.1415927 3.14159265 3.141592654 3.1415926536 3.14159265359 3.14159265359 3.1415926535898 3.1415926535898 3.14159265358979 3.141592653589793 3.141592653589793 PREHOOK: query: create table decimal_tbl_4_orc (pos decimal(38,18), neg decimal(38,18)) STORED AS ORC PREHOOK: type: CREATETABLE @@ -457,7 +457,7 @@ alias: decimal_tbl_4_orc Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: round(pos, 9) (type: decimal(30,9)), round(neg, 9) (type: decimal(30,9)), round(1809242.3151111344, 9) (type: decimal(17,9)), round((- 1809242.3151111344), 9) (type: decimal(17,9)) + expressions: round(pos, 9) (type: decimal(30,9)), round(neg, 9) (type: decimal(30,9)), 1809242.315111134 (type: decimal(17,9)), -1809242.315111134 (type: decimal(17,9)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -465,6 +465,7 @@ sort order: + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(30,9)), _col2 (type: decimal(17,9)), _col3 (type: decimal(17,9)) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Select Operator Index: ql/src/test/results/clientpositive/tez/vector_if_expr.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vector_if_expr.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/tez/vector_if_expr.q.out (working copy) @@ -19,18 +19,18 @@ Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cboolean1 is not null and cboolean1) (type: boolean) - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), if(cboolean1, 'first', 'second') (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 @@ -38,10 +38,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Index: ql/src/test/results/clientpositive/tez/vector_interval_1.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vector_interval_1.q.out (revision 0) +++ ql/src/test/results/clientpositive/tez/vector_interval_1.q.out (working copy) @@ -0,0 +1,822 @@ +PREHOOK: query: drop table if exists vector_interval_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vector_interval_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vector_interval_1 (ts timestamp, dt date, str1 string, str2 string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_interval_1 +POSTHOOK: query: create table vector_interval_1 (ts timestamp, dt date, str1 string, str2 string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_interval_1 +PREHOOK: query: insert into vector_interval_1 + select timestamp '2001-01-01 01:02:03', date '2001-01-01', '1-2', '1 2:3:4' from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_interval_1 +POSTHOOK: query: insert into vector_interval_1 + select timestamp '2001-01-01 01:02:03', date '2001-01-01', '1-2', '1 2:3:4' from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_interval_1 +POSTHOOK: Lineage: vector_interval_1.dt SIMPLE [] +POSTHOOK: Lineage: vector_interval_1.str1 SIMPLE [] +POSTHOOK: Lineage: vector_interval_1.str2 SIMPLE [] +POSTHOOK: Lineage: vector_interval_1.ts SIMPLE [] +PREHOOK: query: insert into vector_interval_1 + select null, null, null, null from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_interval_1 +POSTHOOK: query: insert into vector_interval_1 + select null, null, null, null from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_interval_1 +POSTHOOK: Lineage: vector_interval_1.dt EXPRESSION [] +POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION [] +PREHOOK: query: -- constants/cast from string +explain +select + str1, + interval '1-2' year to month, interval_year_month(str1), + interval '1 2:3:4' day to second, interval_day_time(str2) +from vector_interval_1 order by str1 +PREHOOK: type: QUERY +POSTHOOK: query: -- constants/cast from string +explain +select + str1, + interval '1-2' year to month, interval_year_month(str1), + interval '1 2:3:4' day to second, interval_day_time(str2) +from vector_interval_1 order by str1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str1 (type: string), 1-2 (type: interval_year_month), CAST( str1 AS INTERVAL YEAR TO MONTH) (type: interval_year_month), 1 02:03:04.000000000 (type: interval_day_time), CAST( str2 AS INTERVAL DAY TO SECOND) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_year_month), _col2 (type: interval_year_month), _col3 (type: interval_day_time), _col4 (type: interval_day_time) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + str1, + interval '1-2' year to month, interval_year_month(str1), + interval '1 2:3:4' day to second, interval_day_time(str2) +from vector_interval_1 order by str1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + str1, + interval '1-2' year to month, interval_year_month(str1), + interval '1 2:3:4' day to second, interval_day_time(str2) +from vector_interval_1 order by str1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL 1-2 NULL 1 02:03:04.000000000 NULL +1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000 +PREHOOK: query: -- interval arithmetic +explain +select + dt, + interval '1-2' year to month + interval '1-2' year to month, + interval_year_month(str1) + interval_year_month(str1), + interval '1-2' year to month + interval_year_month(str1), + interval '1-2' year to month - interval '1-2' year to month, + interval_year_month(str1) - interval_year_month(str1), + interval '1-2' year to month - interval_year_month(str1) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +POSTHOOK: query: -- interval arithmetic +explain +select + dt, + interval '1-2' year to month + interval '1-2' year to month, + interval_year_month(str1) + interval_year_month(str1), + interval '1-2' year to month + interval_year_month(str1), + interval '1-2' year to month - interval '1-2' year to month, + interval_year_month(str1) - interval_year_month(str1), + interval '1-2' year to month - interval_year_month(str1) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt (type: date), 2-4 (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), 0-0 (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_year_month), _col2 (type: interval_year_month), _col3 (type: interval_year_month), _col4 (type: interval_year_month), _col5 (type: interval_year_month), _col6 (type: interval_year_month) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), VALUE._col2 (type: interval_year_month), VALUE._col3 (type: interval_year_month), VALUE._col4 (type: interval_year_month), VALUE._col5 (type: interval_year_month) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt, + interval '1-2' year to month + interval '1-2' year to month, + interval_year_month(str1) + interval_year_month(str1), + interval '1-2' year to month + interval_year_month(str1), + interval '1-2' year to month - interval '1-2' year to month, + interval_year_month(str1) - interval_year_month(str1), + interval '1-2' year to month - interval_year_month(str1) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt, + interval '1-2' year to month + interval '1-2' year to month, + interval_year_month(str1) + interval_year_month(str1), + interval '1-2' year to month + interval_year_month(str1), + interval '1-2' year to month - interval '1-2' year to month, + interval_year_month(str1) - interval_year_month(str1), + interval '1-2' year to month - interval_year_month(str1) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL 2-4 NULL NULL 0-0 NULL NULL +2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0 +PREHOOK: query: explain +select + dt, + interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, + interval_day_time(str2) + interval_day_time(str2), + interval '1 2:3:4' day to second + interval_day_time(str2), + interval '1 2:3:4' day to second - interval '1 2:3:4' day to second, + interval_day_time(str2) - interval_day_time(str2), + interval '1 2:3:4' day to second - interval_day_time(str2) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + dt, + interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, + interval_day_time(str2) + interval_day_time(str2), + interval '1 2:3:4' day to second + interval_day_time(str2), + interval '1 2:3:4' day to second - interval '1 2:3:4' day to second, + interval_day_time(str2) - interval_day_time(str2), + interval '1 2:3:4' day to second - interval_day_time(str2) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt (type: date), 2 04:06:08.000000000 (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), 0 00:00:00.000000000 (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt, + interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, + interval_day_time(str2) + interval_day_time(str2), + interval '1 2:3:4' day to second + interval_day_time(str2), + interval '1 2:3:4' day to second - interval '1 2:3:4' day to second, + interval_day_time(str2) - interval_day_time(str2), + interval '1 2:3:4' day to second - interval_day_time(str2) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt, + interval '1 2:3:4' day to second + interval '1 2:3:4' day to second, + interval_day_time(str2) + interval_day_time(str2), + interval '1 2:3:4' day to second + interval_day_time(str2), + interval '1 2:3:4' day to second - interval '1 2:3:4' day to second, + interval_day_time(str2) - interval_day_time(str2), + interval '1 2:3:4' day to second - interval_day_time(str2) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL +2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 +PREHOOK: query: -- date-interval arithmetic +explain +select + dt, + dt + interval '1-2' year to month, + dt + interval_year_month(str1), + interval '1-2' year to month + dt, + interval_year_month(str1) + dt, + dt - interval '1-2' year to month, + dt - interval_year_month(str1), + dt + interval '1 2:3:4' day to second, + dt + interval_day_time(str2), + interval '1 2:3:4' day to second + dt, + interval_day_time(str2) + dt, + dt - interval '1 2:3:4' day to second, + dt - interval_day_time(str2) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +POSTHOOK: query: -- date-interval arithmetic +explain +select + dt, + dt + interval '1-2' year to month, + dt + interval_year_month(str1), + interval '1-2' year to month + dt, + interval_year_month(str1) + dt, + dt - interval '1-2' year to month, + dt - interval_year_month(str1), + dt + interval '1 2:3:4' day to second, + dt + interval_day_time(str2), + interval '1 2:3:4' day to second + dt, + interval_day_time(str2) + dt, + dt - interval '1 2:3:4' day to second, + dt - interval_day_time(str2) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt (type: date), (dt + 1-2) (type: date), (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (1-2 + dt) (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + dt) (type: date), (dt - 1-2) (type: date), (dt - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (dt + 1 02:03:04.000000000) (type: timestamp), (dt + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + dt) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + dt) (type: timestamp), (dt - 1 02:03:04.000000000) (type: timestamp), (dt - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt, + dt + interval '1-2' year to month, + dt + interval_year_month(str1), + interval '1-2' year to month + dt, + interval_year_month(str1) + dt, + dt - interval '1-2' year to month, + dt - interval_year_month(str1), + dt + interval '1 2:3:4' day to second, + dt + interval_day_time(str2), + interval '1 2:3:4' day to second + dt, + interval_day_time(str2) + dt, + dt - interval '1 2:3:4' day to second, + dt - interval_day_time(str2) +from vector_interval_1 order by dt +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt, + dt + interval '1-2' year to month, + dt + interval_year_month(str1), + interval '1-2' year to month + dt, + interval_year_month(str1) + dt, + dt - interval '1-2' year to month, + dt - interval_year_month(str1), + dt + interval '1 2:3:4' day to second, + dt + interval_day_time(str2), + interval '1 2:3:4' day to second + dt, + interval_day_time(str2) + dt, + dt - interval '1 2:3:4' day to second, + dt - interval_day_time(str2) +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56 +PREHOOK: query: -- timestamp-interval arithmetic +explain +select + ts, + ts + interval '1-2' year to month, + ts + interval_year_month(str1), + interval '1-2' year to month + ts, + interval_year_month(str1) + ts, + ts - interval '1-2' year to month, + ts - interval_year_month(str1), + ts + interval '1 2:3:4' day to second, + ts + interval_day_time(str2), + interval '1 2:3:4' day to second + ts, + interval_day_time(str2) + ts, + ts - interval '1 2:3:4' day to second, + ts - interval_day_time(str2) +from vector_interval_1 order by ts +PREHOOK: type: QUERY +POSTHOOK: query: -- timestamp-interval arithmetic +explain +select + ts, + ts + interval '1-2' year to month, + ts + interval_year_month(str1), + interval '1-2' year to month + ts, + interval_year_month(str1) + ts, + ts - interval '1-2' year to month, + ts - interval_year_month(str1), + ts + interval '1 2:3:4' day to second, + ts + interval_day_time(str2), + interval '1 2:3:4' day to second + ts, + interval_day_time(str2) + ts, + ts - interval '1 2:3:4' day to second, + ts - interval_day_time(str2) +from vector_interval_1 order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp), (ts + 1-2) (type: timestamp), (ts + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (1-2 + ts) (type: timestamp), (CAST( str1 AS INTERVAL YEAR TO MONTH) + ts) (type: timestamp), (ts - 1-2) (type: timestamp), (ts - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (ts + 1 02:03:04.000000000) (type: timestamp), (ts + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + ts) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + ts) (type: timestamp), (ts - 1 02:03:04.000000000) (type: timestamp), (ts - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + ts, + ts + interval '1-2' year to month, + ts + interval_year_month(str1), + interval '1-2' year to month + ts, + interval_year_month(str1) + ts, + ts - interval '1-2' year to month, + ts - interval_year_month(str1), + ts + interval '1 2:3:4' day to second, + ts + interval_day_time(str2), + interval '1 2:3:4' day to second + ts, + interval_day_time(str2) + ts, + ts - interval '1 2:3:4' day to second, + ts - interval_day_time(str2) +from vector_interval_1 order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + ts, + ts + interval '1-2' year to month, + ts + interval_year_month(str1), + interval '1-2' year to month + ts, + interval_year_month(str1) + ts, + ts - interval '1-2' year to month, + ts - interval_year_month(str1), + ts + interval '1 2:3:4' day to second, + ts + interval_day_time(str2), + interval '1 2:3:4' day to second + ts, + interval_day_time(str2) + ts, + ts - interval '1 2:3:4' day to second, + ts - interval_day_time(str2) +from vector_interval_1 order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59 +PREHOOK: query: -- timestamp-timestamp arithmetic +explain +select + ts, + ts - ts, + timestamp '2001-01-01 01:02:03' - ts, + ts - timestamp '2001-01-01 01:02:03' +from vector_interval_1 order by ts +PREHOOK: type: QUERY +POSTHOOK: query: -- timestamp-timestamp arithmetic +explain +select + ts, + ts - ts, + timestamp '2001-01-01 01:02:03' - ts, + ts - timestamp '2001-01-01 01:02:03' +from vector_interval_1 order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp), (ts - ts) (type: interval_day_time), (2001-01-01 01:02:03.0 - ts) (type: interval_day_time), (ts - 2001-01-01 01:02:03.0) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + ts, + ts - ts, + timestamp '2001-01-01 01:02:03' - ts, + ts - timestamp '2001-01-01 01:02:03' +from vector_interval_1 order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + ts, + ts - ts, + timestamp '2001-01-01 01:02:03' - ts, + ts - timestamp '2001-01-01 01:02:03' +from vector_interval_1 order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL NULL NULL NULL +2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 +PREHOOK: query: -- date-date arithmetic +explain +select + dt, + dt - dt, + date '2001-01-01' - dt, + dt - date '2001-01-01' +from vector_interval_1 order by dt +PREHOOK: type: QUERY +POSTHOOK: query: -- date-date arithmetic +explain +select + dt, + dt - dt, + date '2001-01-01' - dt, + dt - date '2001-01-01' +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt (type: date), (dt - dt) (type: interval_day_time), (2001-01-01 - dt) (type: interval_day_time), (dt - 2001-01-01) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt, + dt - dt, + date '2001-01-01' - dt, + dt - date '2001-01-01' +from vector_interval_1 order by dt +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt, + dt - dt, + date '2001-01-01' - dt, + dt - date '2001-01-01' +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL NULL NULL NULL +2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000 +PREHOOK: query: -- date-timestamp arithmetic +explain +select + dt, + ts - dt, + timestamp '2001-01-01 01:02:03' - dt, + ts - date '2001-01-01', + dt - ts, + dt - timestamp '2001-01-01 01:02:03', + date '2001-01-01' - ts +from vector_interval_1 order by dt +PREHOOK: type: QUERY +POSTHOOK: query: -- date-timestamp arithmetic +explain +select + dt, + ts - dt, + timestamp '2001-01-01 01:02:03' - dt, + ts - date '2001-01-01', + dt - ts, + dt - timestamp '2001-01-01 01:02:03', + date '2001-01-01' - ts +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_1 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt (type: date), (ts - dt) (type: interval_day_time), (2001-01-01 01:02:03.0 - dt) (type: interval_day_time), (ts - 2001-01-01) (type: interval_day_time), (dt - ts) (type: interval_day_time), (dt - 2001-01-01 01:02:03.0) (type: interval_day_time), (2001-01-01 - ts) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + dt, + ts - dt, + timestamp '2001-01-01 01:02:03' - dt, + ts - date '2001-01-01', + dt - ts, + dt - timestamp '2001-01-01 01:02:03', + date '2001-01-01' - ts +from vector_interval_1 order by dt +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +POSTHOOK: query: select + dt, + ts - dt, + timestamp '2001-01-01 01:02:03' - dt, + ts - date '2001-01-01', + dt - ts, + dt - timestamp '2001-01-01 01:02:03', + date '2001-01-01' - ts +from vector_interval_1 order by dt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_1 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL +2001-01-01 0 01:02:03.000000000 0 01:02:03.000000000 0 01:02:03.000000000 -0 01:02:03.000000000 -0 01:02:03.000000000 -0 01:02:03.000000000 Index: ql/src/test/results/clientpositive/tez/vector_interval_2.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vector_interval_2.q.out (revision 0) +++ ql/src/test/results/clientpositive/tez/vector_interval_2.q.out (working copy) @@ -0,0 +1,1620 @@ +PREHOOK: query: drop table if exists vector_interval_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vector_interval_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vector_interval_2 (ts timestamp, dt date, str1 string, str2 string, str3 string, str4 string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_interval_2 +POSTHOOK: query: create table vector_interval_2 (ts timestamp, dt date, str1 string, str2 string, str3 string, str4 string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_interval_2 +PREHOOK: query: insert into vector_interval_2 + select timestamp '2001-01-01 01:02:03', date '2001-01-01', '1-2', '1-3', '1 2:3:4', '1 2:3:5' from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_interval_2 +POSTHOOK: query: insert into vector_interval_2 + select timestamp '2001-01-01 01:02:03', date '2001-01-01', '1-2', '1-3', '1 2:3:4', '1 2:3:5' from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_interval_2 +POSTHOOK: Lineage: vector_interval_2.dt SIMPLE [] +POSTHOOK: Lineage: vector_interval_2.str1 SIMPLE [] +POSTHOOK: Lineage: vector_interval_2.str2 SIMPLE [] +POSTHOOK: Lineage: vector_interval_2.str3 SIMPLE [] +POSTHOOK: Lineage: vector_interval_2.str4 SIMPLE [] +POSTHOOK: Lineage: vector_interval_2.ts SIMPLE [] +PREHOOK: query: insert into vector_interval_2 + select null, null, null, null, null, null from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@vector_interval_2 +POSTHOOK: query: insert into vector_interval_2 + select null, null, null, null, null, null from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@vector_interval_2 +POSTHOOK: Lineage: vector_interval_2.dt EXPRESSION [] +POSTHOOK: Lineage: vector_interval_2.str1 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_2.str2 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_2.str3 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_2.str4 EXPRESSION [] +POSTHOOK: Lineage: vector_interval_2.ts EXPRESSION [] +PREHOOK: query: -- interval comparisons in select clause + +explain +select + str1, + -- Should all be true + interval_year_month(str1) = interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str2), + interval_year_month(str1) < interval_year_month(str2), + interval_year_month(str1) >= interval_year_month(str1), + interval_year_month(str2) >= interval_year_month(str1), + interval_year_month(str2) > interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str2), + + interval_year_month(str1) = interval '1-2' year to month, + interval_year_month(str1) <= interval '1-2' year to month, + interval_year_month(str1) <= interval '1-3' year to month, + interval_year_month(str1) < interval '1-3' year to month, + interval_year_month(str1) >= interval '1-2' year to month, + interval_year_month(str2) >= interval '1-2' year to month, + interval_year_month(str2) > interval '1-2' year to month, + interval_year_month(str1) != interval '1-3' year to month, + + interval '1-2' year to month = interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str2), + interval '1-2' year to month < interval_year_month(str2), + interval '1-2' year to month >= interval_year_month(str1), + interval '1-3' year to month >= interval_year_month(str1), + interval '1-3' year to month > interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str2) +from vector_interval_2 order by str1 +PREHOOK: type: QUERY +POSTHOOK: query: -- interval comparisons in select clause + +explain +select + str1, + -- Should all be true + interval_year_month(str1) = interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str2), + interval_year_month(str1) < interval_year_month(str2), + interval_year_month(str1) >= interval_year_month(str1), + interval_year_month(str2) >= interval_year_month(str1), + interval_year_month(str2) > interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str2), + + interval_year_month(str1) = interval '1-2' year to month, + interval_year_month(str1) <= interval '1-2' year to month, + interval_year_month(str1) <= interval '1-3' year to month, + interval_year_month(str1) < interval '1-3' year to month, + interval_year_month(str1) >= interval '1-2' year to month, + interval_year_month(str2) >= interval '1-2' year to month, + interval_year_month(str2) > interval '1-2' year to month, + interval_year_month(str1) != interval '1-3' year to month, + + interval '1-2' year to month = interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str2), + interval '1-2' year to month < interval_year_month(str2), + interval '1-2' year to month >= interval_year_month(str1), + interval '1-3' year to month >= interval_year_month(str1), + interval '1-3' year to month > interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str2) +from vector_interval_2 order by str1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3) (type: boolean), (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + str1, + -- Should all be true + interval_year_month(str1) = interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str2), + interval_year_month(str1) < interval_year_month(str2), + interval_year_month(str1) >= interval_year_month(str1), + interval_year_month(str2) >= interval_year_month(str1), + interval_year_month(str2) > interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str2), + + interval_year_month(str1) = interval '1-2' year to month, + interval_year_month(str1) <= interval '1-2' year to month, + interval_year_month(str1) <= interval '1-3' year to month, + interval_year_month(str1) < interval '1-3' year to month, + interval_year_month(str1) >= interval '1-2' year to month, + interval_year_month(str2) >= interval '1-2' year to month, + interval_year_month(str2) > interval '1-2' year to month, + interval_year_month(str1) != interval '1-3' year to month, + + interval '1-2' year to month = interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str2), + interval '1-2' year to month < interval_year_month(str2), + interval '1-2' year to month >= interval_year_month(str1), + interval '1-3' year to month >= interval_year_month(str1), + interval '1-3' year to month > interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str2) +from vector_interval_2 order by str1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select + str1, + -- Should all be true + interval_year_month(str1) = interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str1), + interval_year_month(str1) <= interval_year_month(str2), + interval_year_month(str1) < interval_year_month(str2), + interval_year_month(str1) >= interval_year_month(str1), + interval_year_month(str2) >= interval_year_month(str1), + interval_year_month(str2) > interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str2), + + interval_year_month(str1) = interval '1-2' year to month, + interval_year_month(str1) <= interval '1-2' year to month, + interval_year_month(str1) <= interval '1-3' year to month, + interval_year_month(str1) < interval '1-3' year to month, + interval_year_month(str1) >= interval '1-2' year to month, + interval_year_month(str2) >= interval '1-2' year to month, + interval_year_month(str2) > interval '1-2' year to month, + interval_year_month(str1) != interval '1-3' year to month, + + interval '1-2' year to month = interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str1), + interval '1-2' year to month <= interval_year_month(str2), + interval '1-2' year to month < interval_year_month(str2), + interval '1-2' year to month >= interval_year_month(str1), + interval '1-3' year to month >= interval_year_month(str1), + interval '1-3' year to month > interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str2) +from vector_interval_2 order by str1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1-2 true true true true true true true true true true true true true true true true true true true true true true true true +PREHOOK: query: explain +select + str1, + -- Should all be false + interval_year_month(str1) != interval_year_month(str1), + interval_year_month(str1) >= interval_year_month(str2), + interval_year_month(str1) > interval_year_month(str2), + interval_year_month(str2) <= interval_year_month(str1), + interval_year_month(str2) < interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str1), + + interval_year_month(str1) != interval '1-2' year to month, + interval_year_month(str1) >= interval '1-3' year to month, + interval_year_month(str1) > interval '1-3' year to month, + interval_year_month(str2) <= interval '1-2' year to month, + interval_year_month(str2) < interval '1-2' year to month, + interval_year_month(str1) != interval '1-2' year to month, + + interval '1-2' year to month != interval_year_month(str1), + interval '1-2' year to month >= interval_year_month(str2), + interval '1-2' year to month > interval_year_month(str2), + interval '1-3' year to month <= interval_year_month(str1), + interval '1-3' year to month < interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str1) +from vector_interval_2 order by str1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + str1, + -- Should all be false + interval_year_month(str1) != interval_year_month(str1), + interval_year_month(str1) >= interval_year_month(str2), + interval_year_month(str1) > interval_year_month(str2), + interval_year_month(str2) <= interval_year_month(str1), + interval_year_month(str2) < interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str1), + + interval_year_month(str1) != interval '1-2' year to month, + interval_year_month(str1) >= interval '1-3' year to month, + interval_year_month(str1) > interval '1-3' year to month, + interval_year_month(str2) <= interval '1-2' year to month, + interval_year_month(str2) < interval '1-2' year to month, + interval_year_month(str1) != interval '1-2' year to month, + + interval '1-2' year to month != interval_year_month(str1), + interval '1-2' year to month >= interval_year_month(str2), + interval '1-2' year to month > interval_year_month(str2), + interval '1-3' year to month <= interval_year_month(str1), + interval '1-3' year to month < interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str1) +from vector_interval_2 order by str1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str1 (type: string), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= 1-2) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < 1-2) (type: boolean), (1-2 <> CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) < CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) > 1-3) (type: boolean) + outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + str1, + -- Should all be false + interval_year_month(str1) != interval_year_month(str1), + interval_year_month(str1) >= interval_year_month(str2), + interval_year_month(str1) > interval_year_month(str2), + interval_year_month(str2) <= interval_year_month(str1), + interval_year_month(str2) < interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str1), + + interval_year_month(str1) != interval '1-2' year to month, + interval_year_month(str1) >= interval '1-3' year to month, + interval_year_month(str1) > interval '1-3' year to month, + interval_year_month(str2) <= interval '1-2' year to month, + interval_year_month(str2) < interval '1-2' year to month, + interval_year_month(str1) != interval '1-2' year to month, + + interval '1-2' year to month != interval_year_month(str1), + interval '1-2' year to month >= interval_year_month(str2), + interval '1-2' year to month > interval_year_month(str2), + interval '1-3' year to month <= interval_year_month(str1), + interval '1-3' year to month < interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str1) +from vector_interval_2 order by str1 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select + str1, + -- Should all be false + interval_year_month(str1) != interval_year_month(str1), + interval_year_month(str1) >= interval_year_month(str2), + interval_year_month(str1) > interval_year_month(str2), + interval_year_month(str2) <= interval_year_month(str1), + interval_year_month(str2) < interval_year_month(str1), + interval_year_month(str1) != interval_year_month(str1), + + interval_year_month(str1) != interval '1-2' year to month, + interval_year_month(str1) >= interval '1-3' year to month, + interval_year_month(str1) > interval '1-3' year to month, + interval_year_month(str2) <= interval '1-2' year to month, + interval_year_month(str2) < interval '1-2' year to month, + interval_year_month(str1) != interval '1-2' year to month, + + interval '1-2' year to month != interval_year_month(str1), + interval '1-2' year to month >= interval_year_month(str2), + interval '1-2' year to month > interval_year_month(str2), + interval '1-3' year to month <= interval_year_month(str1), + interval '1-3' year to month < interval_year_month(str1), + interval '1-2' year to month != interval_year_month(str1) +from vector_interval_2 order by str1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1-2 false false false false false false false false false false false false false false false false false false +PREHOOK: query: explain +select + str3, + -- Should all be true + interval_day_time(str3) = interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str4), + interval_day_time(str3) < interval_day_time(str4), + interval_day_time(str3) >= interval_day_time(str3), + interval_day_time(str4) >= interval_day_time(str3), + interval_day_time(str4) > interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str4), + + interval_day_time(str3) = interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:5' day to second, + interval_day_time(str3) < interval '1 2:3:5' day to second, + interval_day_time(str3) >= interval '1 2:3:4' day to second, + interval_day_time(str4) >= interval '1 2:3:4' day to second, + interval_day_time(str4) > interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:5' day to second, + + interval '1 2:3:4' day to second = interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str4), + interval '1 2:3:4' day to second < interval_day_time(str4), + interval '1 2:3:4' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second > interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str4) +from vector_interval_2 order by str3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + str3, + -- Should all be true + interval_day_time(str3) = interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str4), + interval_day_time(str3) < interval_day_time(str4), + interval_day_time(str3) >= interval_day_time(str3), + interval_day_time(str4) >= interval_day_time(str3), + interval_day_time(str4) > interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str4), + + interval_day_time(str3) = interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:5' day to second, + interval_day_time(str3) < interval '1 2:3:5' day to second, + interval_day_time(str3) >= interval '1 2:3:4' day to second, + interval_day_time(str4) >= interval '1 2:3:4' day to second, + interval_day_time(str4) > interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:5' day to second, + + interval '1 2:3:4' day to second = interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str4), + interval '1 2:3:4' day to second < interval_day_time(str4), + interval '1 2:3:4' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second > interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str4) +from vector_interval_2 order by str3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000) (type: boolean), (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col15 (type: boolean), VALUE._col16 (type: boolean), VALUE._col17 (type: boolean), VALUE._col18 (type: boolean), VALUE._col19 (type: boolean), VALUE._col20 (type: boolean), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean), VALUE._col23 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + str3, + -- Should all be true + interval_day_time(str3) = interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str4), + interval_day_time(str3) < interval_day_time(str4), + interval_day_time(str3) >= interval_day_time(str3), + interval_day_time(str4) >= interval_day_time(str3), + interval_day_time(str4) > interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str4), + + interval_day_time(str3) = interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:5' day to second, + interval_day_time(str3) < interval '1 2:3:5' day to second, + interval_day_time(str3) >= interval '1 2:3:4' day to second, + interval_day_time(str4) >= interval '1 2:3:4' day to second, + interval_day_time(str4) > interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:5' day to second, + + interval '1 2:3:4' day to second = interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str4), + interval '1 2:3:4' day to second < interval_day_time(str4), + interval '1 2:3:4' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second > interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str4) +from vector_interval_2 order by str3 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select + str3, + -- Should all be true + interval_day_time(str3) = interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str3), + interval_day_time(str3) <= interval_day_time(str4), + interval_day_time(str3) < interval_day_time(str4), + interval_day_time(str3) >= interval_day_time(str3), + interval_day_time(str4) >= interval_day_time(str3), + interval_day_time(str4) > interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str4), + + interval_day_time(str3) = interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:4' day to second, + interval_day_time(str3) <= interval '1 2:3:5' day to second, + interval_day_time(str3) < interval '1 2:3:5' day to second, + interval_day_time(str3) >= interval '1 2:3:4' day to second, + interval_day_time(str4) >= interval '1 2:3:4' day to second, + interval_day_time(str4) > interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:5' day to second, + + interval '1 2:3:4' day to second = interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str3), + interval '1 2:3:4' day to second <= interval_day_time(str4), + interval '1 2:3:4' day to second < interval_day_time(str4), + interval '1 2:3:4' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second >= interval_day_time(str3), + interval '1 2:3:5' day to second > interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str4) +from vector_interval_2 order by str3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1 2:3:4 true true true true true true true true true true true true true true true true true true true true true true true true +PREHOOK: query: explain +select + str3, + -- Should all be false + interval_day_time(str3) != interval_day_time(str3), + interval_day_time(str3) >= interval_day_time(str4), + interval_day_time(str3) > interval_day_time(str4), + interval_day_time(str4) <= interval_day_time(str3), + interval_day_time(str4) < interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str3), + + interval_day_time(str3) != interval '1 2:3:4' day to second, + interval_day_time(str3) >= interval '1 2:3:5' day to second, + interval_day_time(str3) > interval '1 2:3:5' day to second, + interval_day_time(str4) <= interval '1 2:3:4' day to second, + interval_day_time(str4) < interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:4' day to second, + + interval '1 2:3:4' day to second != interval_day_time(str3), + interval '1 2:3:4' day to second >= interval_day_time(str4), + interval '1 2:3:4' day to second > interval_day_time(str4), + interval '1 2:3:5' day to second <= interval_day_time(str3), + interval '1 2:3:5' day to second < interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str3) +from vector_interval_2 order by str3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + str3, + -- Should all be false + interval_day_time(str3) != interval_day_time(str3), + interval_day_time(str3) >= interval_day_time(str4), + interval_day_time(str3) > interval_day_time(str4), + interval_day_time(str4) <= interval_day_time(str3), + interval_day_time(str4) < interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str3), + + interval_day_time(str3) != interval '1 2:3:4' day to second, + interval_day_time(str3) >= interval '1 2:3:5' day to second, + interval_day_time(str3) > interval '1 2:3:5' day to second, + interval_day_time(str4) <= interval '1 2:3:4' day to second, + interval_day_time(str4) < interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:4' day to second, + + interval '1 2:3:4' day to second != interval_day_time(str3), + interval '1 2:3:4' day to second >= interval_day_time(str4), + interval '1 2:3:4' day to second > interval_day_time(str4), + interval '1 2:3:5' day to second <= interval_day_time(str3), + interval '1 2:3:5' day to second < interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str3) +from vector_interval_2 order by str3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str3 (type: string), (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= 1 02:03:04.000000000) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < 1 02:03:04.000000000) (type: boolean), (1 02:03:04.000000000 <> CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:04.000000000 > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (1 02:03:05.000000000 < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > CAST( str4 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) <= CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str4 AS INTERVAL DAY TO SECOND) < CAST( str3 AS INTERVAL DAY TO SECOND)) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:04.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) >= 1 02:03:05.000000000) (type: boolean), (CAST( str3 AS INTERVAL DAY TO SECOND) > 1 02:03:05.000000000) (type: boolean) + outputColumnNames: _col0, _col1, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col2, _col3, _col4, _col5, _col7, _col8, _col9 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col0 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean), VALUE._col9 (type: boolean), VALUE._col5 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean), VALUE._col12 (type: boolean), VALUE._col13 (type: boolean), VALUE._col14 (type: boolean), VALUE._col10 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + str3, + -- Should all be false + interval_day_time(str3) != interval_day_time(str3), + interval_day_time(str3) >= interval_day_time(str4), + interval_day_time(str3) > interval_day_time(str4), + interval_day_time(str4) <= interval_day_time(str3), + interval_day_time(str4) < interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str3), + + interval_day_time(str3) != interval '1 2:3:4' day to second, + interval_day_time(str3) >= interval '1 2:3:5' day to second, + interval_day_time(str3) > interval '1 2:3:5' day to second, + interval_day_time(str4) <= interval '1 2:3:4' day to second, + interval_day_time(str4) < interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:4' day to second, + + interval '1 2:3:4' day to second != interval_day_time(str3), + interval '1 2:3:4' day to second >= interval_day_time(str4), + interval '1 2:3:4' day to second > interval_day_time(str4), + interval '1 2:3:5' day to second <= interval_day_time(str3), + interval '1 2:3:5' day to second < interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str3) +from vector_interval_2 order by str3 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select + str3, + -- Should all be false + interval_day_time(str3) != interval_day_time(str3), + interval_day_time(str3) >= interval_day_time(str4), + interval_day_time(str3) > interval_day_time(str4), + interval_day_time(str4) <= interval_day_time(str3), + interval_day_time(str4) < interval_day_time(str3), + interval_day_time(str3) != interval_day_time(str3), + + interval_day_time(str3) != interval '1 2:3:4' day to second, + interval_day_time(str3) >= interval '1 2:3:5' day to second, + interval_day_time(str3) > interval '1 2:3:5' day to second, + interval_day_time(str4) <= interval '1 2:3:4' day to second, + interval_day_time(str4) < interval '1 2:3:4' day to second, + interval_day_time(str3) != interval '1 2:3:4' day to second, + + interval '1 2:3:4' day to second != interval_day_time(str3), + interval '1 2:3:4' day to second >= interval_day_time(str4), + interval '1 2:3:4' day to second > interval_day_time(str4), + interval '1 2:3:5' day to second <= interval_day_time(str3), + interval '1 2:3:5' day to second < interval_day_time(str3), + interval '1 2:3:4' day to second != interval_day_time(str3) +from vector_interval_2 order by str3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1 2:3:4 false false false false false false false false false false false false false false false false false false +PREHOOK: query: -- interval expressions in predicates +explain +select ts from vector_interval_2 +where + interval_year_month(str1) = interval_year_month(str1) + and interval_year_month(str1) != interval_year_month(str2) + and interval_year_month(str1) <= interval_year_month(str2) + and interval_year_month(str1) < interval_year_month(str2) + and interval_year_month(str2) >= interval_year_month(str1) + and interval_year_month(str2) > interval_year_month(str1) + + and interval_year_month(str1) = interval '1-2' year to month + and interval_year_month(str1) != interval '1-3' year to month + and interval_year_month(str1) <= interval '1-3' year to month + and interval_year_month(str1) < interval '1-3' year to month + and interval_year_month(str2) >= interval '1-2' year to month + and interval_year_month(str2) > interval '1-2' year to month + + and interval '1-2' year to month = interval_year_month(str1) + and interval '1-2' year to month != interval_year_month(str2) + and interval '1-2' year to month <= interval_year_month(str2) + and interval '1-2' year to month < interval_year_month(str2) + and interval '1-3' year to month >= interval_year_month(str1) + and interval '1-3' year to month > interval_year_month(str1) +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: -- interval expressions in predicates +explain +select ts from vector_interval_2 +where + interval_year_month(str1) = interval_year_month(str1) + and interval_year_month(str1) != interval_year_month(str2) + and interval_year_month(str1) <= interval_year_month(str2) + and interval_year_month(str1) < interval_year_month(str2) + and interval_year_month(str2) >= interval_year_month(str1) + and interval_year_month(str2) > interval_year_month(str1) + + and interval_year_month(str1) = interval '1-2' year to month + and interval_year_month(str1) != interval '1-3' year to month + and interval_year_month(str1) <= interval '1-3' year to month + and interval_year_month(str1) < interval '1-3' year to month + and interval_year_month(str2) >= interval '1-2' year to month + and interval_year_month(str2) > interval '1-2' year to month + + and interval '1-2' year to month = interval_year_month(str1) + and interval '1-2' year to month != interval_year_month(str2) + and interval '1-2' year to month <= interval_year_month(str2) + and interval '1-2' year to month < interval_year_month(str2) + and interval '1-3' year to month >= interval_year_month(str1) + and interval '1-3' year to month > interval_year_month(str1) +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((((((CAST( str1 AS INTERVAL YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH))) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS INTERVAL YEAR TO MONTH))) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH))) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH))) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 AS INTERVAL YEAR TO MONTH))) and (CAST( str1 AS INTERVAL YEAR TO MONTH) = 1-2)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <> 1-3)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) <= 1-3)) and (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2)) and (CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2)) and (1-2 = CAST( str1 AS INTERVAL YEAR TO MONTH))) and (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH))) and (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH))) and (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH))) and (1-3 >= CAST( str1 AS INTERVAL YEAR TO MONTH))) and (1-3 > CAST( str1 AS INTERVAL YEAR TO MONTH))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + interval_year_month(str1) = interval_year_month(str1) + and interval_year_month(str1) != interval_year_month(str2) + and interval_year_month(str1) <= interval_year_month(str2) + and interval_year_month(str1) < interval_year_month(str2) + and interval_year_month(str2) >= interval_year_month(str1) + and interval_year_month(str2) > interval_year_month(str1) + + and interval_year_month(str1) = interval '1-2' year to month + and interval_year_month(str1) != interval '1-3' year to month + and interval_year_month(str1) <= interval '1-3' year to month + and interval_year_month(str1) < interval '1-3' year to month + and interval_year_month(str2) >= interval '1-2' year to month + and interval_year_month(str2) > interval '1-2' year to month + + and interval '1-2' year to month = interval_year_month(str1) + and interval '1-2' year to month != interval_year_month(str2) + and interval '1-2' year to month <= interval_year_month(str2) + and interval '1-2' year to month < interval_year_month(str2) + and interval '1-3' year to month >= interval_year_month(str1) + and interval '1-3' year to month > interval_year_month(str1) +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + interval_year_month(str1) = interval_year_month(str1) + and interval_year_month(str1) != interval_year_month(str2) + and interval_year_month(str1) <= interval_year_month(str2) + and interval_year_month(str1) < interval_year_month(str2) + and interval_year_month(str2) >= interval_year_month(str1) + and interval_year_month(str2) > interval_year_month(str1) + + and interval_year_month(str1) = interval '1-2' year to month + and interval_year_month(str1) != interval '1-3' year to month + and interval_year_month(str1) <= interval '1-3' year to month + and interval_year_month(str1) < interval '1-3' year to month + and interval_year_month(str2) >= interval '1-2' year to month + and interval_year_month(str2) > interval '1-2' year to month + + and interval '1-2' year to month = interval_year_month(str1) + and interval '1-2' year to month != interval_year_month(str2) + and interval '1-2' year to month <= interval_year_month(str2) + and interval '1-2' year to month < interval_year_month(str2) + and interval '1-3' year to month >= interval_year_month(str1) + and interval '1-3' year to month > interval_year_month(str1) +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: explain +select ts from vector_interval_2 +where + interval_day_time(str3) = interval_day_time(str3) + and interval_day_time(str3) != interval_day_time(str4) + and interval_day_time(str3) <= interval_day_time(str4) + and interval_day_time(str3) < interval_day_time(str4) + and interval_day_time(str4) >= interval_day_time(str3) + and interval_day_time(str4) > interval_day_time(str3) + + and interval_day_time(str3) = interval '1 2:3:4' day to second + and interval_day_time(str3) != interval '1 2:3:5' day to second + and interval_day_time(str3) <= interval '1 2:3:5' day to second + and interval_day_time(str3) < interval '1 2:3:5' day to second + and interval_day_time(str4) >= interval '1 2:3:4' day to second + and interval_day_time(str4) > interval '1 2:3:4' day to second + + and interval '1 2:3:4' day to second = interval_day_time(str3) + and interval '1 2:3:4' day to second != interval_day_time(str4) + and interval '1 2:3:4' day to second <= interval_day_time(str4) + and interval '1 2:3:4' day to second < interval_day_time(str4) + and interval '1 2:3:5' day to second >= interval_day_time(str3) + and interval '1 2:3:5' day to second > interval_day_time(str3) +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ts from vector_interval_2 +where + interval_day_time(str3) = interval_day_time(str3) + and interval_day_time(str3) != interval_day_time(str4) + and interval_day_time(str3) <= interval_day_time(str4) + and interval_day_time(str3) < interval_day_time(str4) + and interval_day_time(str4) >= interval_day_time(str3) + and interval_day_time(str4) > interval_day_time(str3) + + and interval_day_time(str3) = interval '1 2:3:4' day to second + and interval_day_time(str3) != interval '1 2:3:5' day to second + and interval_day_time(str3) <= interval '1 2:3:5' day to second + and interval_day_time(str3) < interval '1 2:3:5' day to second + and interval_day_time(str4) >= interval '1 2:3:4' day to second + and interval_day_time(str4) > interval '1 2:3:4' day to second + + and interval '1 2:3:4' day to second = interval_day_time(str3) + and interval '1 2:3:4' day to second != interval_day_time(str4) + and interval '1 2:3:4' day to second <= interval_day_time(str4) + and interval '1 2:3:4' day to second < interval_day_time(str4) + and interval '1 2:3:5' day to second >= interval_day_time(str3) + and interval '1 2:3:5' day to second > interval_day_time(str3) +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((((((CAST( str3 AS INTERVAL DAY TO SECOND) = CAST( str3 AS INTERVAL DAY TO SECOND)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> CAST( str4 AS INTERVAL DAY TO SECOND))) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= CAST( str4 AS INTERVAL DAY TO SECOND))) and (CAST( str3 AS INTERVAL DAY TO SECOND) < CAST( str4 AS INTERVAL DAY TO SECOND))) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= CAST( str3 AS INTERVAL DAY TO SECOND))) and (CAST( str4 AS INTERVAL DAY TO SECOND) > CAST( str3 AS INTERVAL DAY TO SECOND))) and (CAST( str3 AS INTERVAL DAY TO SECOND) = 1 02:03:04.000000000)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <> 1 02:03:05.000000000)) and (CAST( str3 AS INTERVAL DAY TO SECOND) <= 1 02:03:05.000000000)) and (CAST( str3 AS INTERVAL DAY TO SECOND) < 1 02:03:05.000000000)) and (CAST( str4 AS INTERVAL DAY TO SECOND) >= 1 02:03:04.000000000)) and (CAST( str4 AS INTERVAL DAY TO SECOND) > 1 02:03:04.000000000)) and (1 02:03:04.000000000 = CAST( str3 AS INTERVAL DAY TO SECOND))) and (1 02:03:04.000000000 <> CAST( str4 AS INTERVAL DAY TO SECOND))) and (1 02:03:04.000000000 <= CAST( str4 AS INTERVAL DAY TO SECOND))) and (1 02:03:04.000000000 < CAST( str4 AS INTERVAL DAY TO SECOND))) and (1 02:03:05.000000000 >= CAST( str3 AS INTERVAL DAY TO SECOND))) and (1 02:03:05.000000000 > CAST( str3 AS INTERVAL DAY TO SECOND))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + interval_day_time(str3) = interval_day_time(str3) + and interval_day_time(str3) != interval_day_time(str4) + and interval_day_time(str3) <= interval_day_time(str4) + and interval_day_time(str3) < interval_day_time(str4) + and interval_day_time(str4) >= interval_day_time(str3) + and interval_day_time(str4) > interval_day_time(str3) + + and interval_day_time(str3) = interval '1 2:3:4' day to second + and interval_day_time(str3) != interval '1 2:3:5' day to second + and interval_day_time(str3) <= interval '1 2:3:5' day to second + and interval_day_time(str3) < interval '1 2:3:5' day to second + and interval_day_time(str4) >= interval '1 2:3:4' day to second + and interval_day_time(str4) > interval '1 2:3:4' day to second + + and interval '1 2:3:4' day to second = interval_day_time(str3) + and interval '1 2:3:4' day to second != interval_day_time(str4) + and interval '1 2:3:4' day to second <= interval_day_time(str4) + and interval '1 2:3:4' day to second < interval_day_time(str4) + and interval '1 2:3:5' day to second >= interval_day_time(str3) + and interval '1 2:3:5' day to second > interval_day_time(str3) +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + interval_day_time(str3) = interval_day_time(str3) + and interval_day_time(str3) != interval_day_time(str4) + and interval_day_time(str3) <= interval_day_time(str4) + and interval_day_time(str3) < interval_day_time(str4) + and interval_day_time(str4) >= interval_day_time(str3) + and interval_day_time(str4) > interval_day_time(str3) + + and interval_day_time(str3) = interval '1 2:3:4' day to second + and interval_day_time(str3) != interval '1 2:3:5' day to second + and interval_day_time(str3) <= interval '1 2:3:5' day to second + and interval_day_time(str3) < interval '1 2:3:5' day to second + and interval_day_time(str4) >= interval '1 2:3:4' day to second + and interval_day_time(str4) > interval '1 2:3:4' day to second + + and interval '1 2:3:4' day to second = interval_day_time(str3) + and interval '1 2:3:4' day to second != interval_day_time(str4) + and interval '1 2:3:4' day to second <= interval_day_time(str4) + and interval '1 2:3:4' day to second < interval_day_time(str4) + and interval '1 2:3:5' day to second >= interval_day_time(str3) + and interval '1 2:3:5' day to second > interval_day_time(str3) +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: explain +select ts from vector_interval_2 +where + date '2002-03-01' = dt + interval_year_month(str1) + and date '2002-03-01' <= dt + interval_year_month(str1) + and date '2002-03-01' >= dt + interval_year_month(str1) + and dt + interval_year_month(str1) = date '2002-03-01' + and dt + interval_year_month(str1) <= date '2002-03-01' + and dt + interval_year_month(str1) >= date '2002-03-01' + and dt != dt + interval_year_month(str1) + + and date '2002-03-01' = dt + interval '1-2' year to month + and date '2002-03-01' <= dt + interval '1-2' year to month + and date '2002-03-01' >= dt + interval '1-2' year to month + and dt + interval '1-2' year to month = date '2002-03-01' + and dt + interval '1-2' year to month <= date '2002-03-01' + and dt + interval '1-2' year to month >= date '2002-03-01' + and dt != dt + interval '1-2' year to month +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ts from vector_interval_2 +where + date '2002-03-01' = dt + interval_year_month(str1) + and date '2002-03-01' <= dt + interval_year_month(str1) + and date '2002-03-01' >= dt + interval_year_month(str1) + and dt + interval_year_month(str1) = date '2002-03-01' + and dt + interval_year_month(str1) <= date '2002-03-01' + and dt + interval_year_month(str1) >= date '2002-03-01' + and dt != dt + interval_year_month(str1) + + and date '2002-03-01' = dt + interval '1-2' year to month + and date '2002-03-01' <= dt + interval '1-2' year to month + and date '2002-03-01' >= dt + interval '1-2' year to month + and dt + interval '1-2' year to month = date '2002-03-01' + and dt + interval '1-2' year to month <= date '2002-03-01' + and dt + interval '1-2' year to month >= date '2002-03-01' + and dt != dt + interval '1-2' year to month +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((2002-03-01 = (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (2002-03-01 <= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)))) and (2002-03-01 >= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)))) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) = 2002-03-01)) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) <= 2002-03-01)) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) >= 2002-03-01)) and (dt <> (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)))) and (2002-03-01 = (dt + 1-2))) and (2002-03-01 <= (dt + 1-2))) and (2002-03-01 >= (dt + 1-2))) and ((dt + 1-2) = 2002-03-01)) and ((dt + 1-2) <= 2002-03-01)) and ((dt + 1-2) >= 2002-03-01)) and (dt <> (dt + 1-2))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + date '2002-03-01' = dt + interval_year_month(str1) + and date '2002-03-01' <= dt + interval_year_month(str1) + and date '2002-03-01' >= dt + interval_year_month(str1) + and dt + interval_year_month(str1) = date '2002-03-01' + and dt + interval_year_month(str1) <= date '2002-03-01' + and dt + interval_year_month(str1) >= date '2002-03-01' + and dt != dt + interval_year_month(str1) + + and date '2002-03-01' = dt + interval '1-2' year to month + and date '2002-03-01' <= dt + interval '1-2' year to month + and date '2002-03-01' >= dt + interval '1-2' year to month + and dt + interval '1-2' year to month = date '2002-03-01' + and dt + interval '1-2' year to month <= date '2002-03-01' + and dt + interval '1-2' year to month >= date '2002-03-01' + and dt != dt + interval '1-2' year to month +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + date '2002-03-01' = dt + interval_year_month(str1) + and date '2002-03-01' <= dt + interval_year_month(str1) + and date '2002-03-01' >= dt + interval_year_month(str1) + and dt + interval_year_month(str1) = date '2002-03-01' + and dt + interval_year_month(str1) <= date '2002-03-01' + and dt + interval_year_month(str1) >= date '2002-03-01' + and dt != dt + interval_year_month(str1) + + and date '2002-03-01' = dt + interval '1-2' year to month + and date '2002-03-01' <= dt + interval '1-2' year to month + and date '2002-03-01' >= dt + interval '1-2' year to month + and dt + interval '1-2' year to month = date '2002-03-01' + and dt + interval '1-2' year to month <= date '2002-03-01' + and dt + interval '1-2' year to month >= date '2002-03-01' + and dt != dt + interval '1-2' year to month +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: explain +select ts from vector_interval_2 +where + timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' <= ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' >= ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' != ts + interval '1-2' year to month + and timestamp '2002-02-01 01:02:03' < ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' > ts + interval '1-2' year to month + + and ts + interval '1-2' year to month = timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month >= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month <= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month != timestamp '2002-04-01 01:02:03' + and ts + interval '1-2' year to month > timestamp '2002-02-01 01:02:03' + and ts + interval '1-2' year to month < timestamp '2002-04-01 01:02:03' + + and ts = ts + interval '0' year + and ts != ts + interval '1' year + and ts <= ts + interval '1' year + and ts < ts + interval '1' year + and ts >= ts - interval '1' year + and ts > ts - interval '1' year +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ts from vector_interval_2 +where + timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' <= ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' >= ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' != ts + interval '1-2' year to month + and timestamp '2002-02-01 01:02:03' < ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' > ts + interval '1-2' year to month + + and ts + interval '1-2' year to month = timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month >= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month <= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month != timestamp '2002-04-01 01:02:03' + and ts + interval '1-2' year to month > timestamp '2002-02-01 01:02:03' + and ts + interval '1-2' year to month < timestamp '2002-04-01 01:02:03' + + and ts = ts + interval '0' year + and ts != ts + interval '1' year + and ts <= ts + interval '1' year + and ts < ts + interval '1' year + and ts >= ts - interval '1' year + and ts > ts - interval '1' year +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((((((2002-03-01 01:02:03.0 = (ts + 1-2)) and (2002-03-01 01:02:03.0 <= (ts + 1-2))) and (2002-03-01 01:02:03.0 >= (ts + 1-2))) and (2002-04-01 01:02:03.0 <> (ts + 1-2))) and (2002-02-01 01:02:03.0 < (ts + 1-2))) and (2002-04-01 01:02:03.0 > (ts + 1-2))) and ((ts + 1-2) = 2002-03-01 01:02:03.0)) and ((ts + 1-2) >= 2002-03-01 01:02:03.0)) and ((ts + 1-2) <= 2002-03-01 01:02:03.0)) and ((ts + 1-2) <> 2002-04-01 01:02:03.0)) and ((ts + 1-2) > 2002-02-01 01:02:03.0)) and ((ts + 1-2) < 2002-04-01 01:02:03.0)) and (ts = (ts + 0-0))) and (ts <> (ts + 1-0))) and (ts <= (ts + 1-0))) and (ts < (ts + 1-0))) and (ts >= (ts - 1-0))) and (ts > (ts - 1-0))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' <= ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' >= ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' != ts + interval '1-2' year to month + and timestamp '2002-02-01 01:02:03' < ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' > ts + interval '1-2' year to month + + and ts + interval '1-2' year to month = timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month >= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month <= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month != timestamp '2002-04-01 01:02:03' + and ts + interval '1-2' year to month > timestamp '2002-02-01 01:02:03' + and ts + interval '1-2' year to month < timestamp '2002-04-01 01:02:03' + + and ts = ts + interval '0' year + and ts != ts + interval '1' year + and ts <= ts + interval '1' year + and ts < ts + interval '1' year + and ts >= ts - interval '1' year + and ts > ts - interval '1' year +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' <= ts + interval '1-2' year to month + and timestamp '2002-03-01 01:02:03' >= ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' != ts + interval '1-2' year to month + and timestamp '2002-02-01 01:02:03' < ts + interval '1-2' year to month + and timestamp '2002-04-01 01:02:03' > ts + interval '1-2' year to month + + and ts + interval '1-2' year to month = timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month >= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month <= timestamp '2002-03-01 01:02:03' + and ts + interval '1-2' year to month != timestamp '2002-04-01 01:02:03' + and ts + interval '1-2' year to month > timestamp '2002-02-01 01:02:03' + and ts + interval '1-2' year to month < timestamp '2002-04-01 01:02:03' + + and ts = ts + interval '0' year + and ts != ts + interval '1' year + and ts <= ts + interval '1' year + and ts < ts + interval '1' year + and ts >= ts - interval '1' year + and ts > ts - interval '1' year +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: -- day to second expressions in predicate +explain +select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' != dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' <= dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' < dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' >= dt - interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' > dt - interval '0 1:2:4' day to second + + and dt + interval '0 1:2:3' day to second = timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second != timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:3' day to second >= timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second > timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:3' day to second <= timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:4' day to second < timestamp '2001-01-01 01:02:03' + + and ts = dt + interval '0 1:2:3' day to second + and ts != dt + interval '0 1:2:4' day to second + and ts <= dt + interval '0 1:2:3' day to second + and ts < dt + interval '0 1:2:4' day to second + and ts >= dt - interval '0 1:2:3' day to second + and ts > dt - interval '0 1:2:4' day to second +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: -- day to second expressions in predicate +explain +select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' != dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' <= dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' < dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' >= dt - interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' > dt - interval '0 1:2:4' day to second + + and dt + interval '0 1:2:3' day to second = timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second != timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:3' day to second >= timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second > timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:3' day to second <= timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:4' day to second < timestamp '2001-01-01 01:02:03' + + and ts = dt + interval '0 1:2:3' day to second + and ts != dt + interval '0 1:2:4' day to second + and ts <= dt + interval '0 1:2:3' day to second + and ts < dt + interval '0 1:2:4' day to second + and ts >= dt - interval '0 1:2:3' day to second + and ts > dt - interval '0 1:2:4' day to second +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((((((2001-01-01 01:02:03.0 = (dt + 0 01:02:03.000000000)) and (2001-01-01 01:02:03.0 <> (dt + 0 01:02:04.000000000))) and (2001-01-01 01:02:03.0 <= (dt + 0 01:02:03.000000000))) and (2001-01-01 01:02:03.0 < (dt + 0 01:02:04.000000000))) and (2001-01-01 01:02:03.0 >= (dt - 0 01:02:03.000000000))) and (2001-01-01 01:02:03.0 > (dt - 0 01:02:04.000000000))) and ((dt + 0 01:02:03.000000000) = 2001-01-01 01:02:03.0)) and ((dt + 0 01:02:04.000000000) <> 2001-01-01 01:02:03.0)) and ((dt + 0 01:02:03.000000000) >= 2001-01-01 01:02:03.0)) and ((dt + 0 01:02:04.000000000) > 2001-01-01 01:02:03.0)) and ((dt - 0 01:02:03.000000000) <= 2001-01-01 01:02:03.0)) and ((dt - 0 01:02:04.000000000) < 2001-01-01 01:02:03.0)) and (ts = (dt + 0 01:02:03.000000000))) and (ts <> (dt + 0 01:02:04.000000000))) and (ts <= (dt + 0 01:02:03.000000000))) and (ts < (dt + 0 01:02:04.000000000))) and (ts >= (dt - 0 01:02:03.000000000))) and (ts > (dt - 0 01:02:04.000000000))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' != dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' <= dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' < dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' >= dt - interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' > dt - interval '0 1:2:4' day to second + + and dt + interval '0 1:2:3' day to second = timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second != timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:3' day to second >= timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second > timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:3' day to second <= timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:4' day to second < timestamp '2001-01-01 01:02:03' + + and ts = dt + interval '0 1:2:3' day to second + and ts != dt + interval '0 1:2:4' day to second + and ts <= dt + interval '0 1:2:3' day to second + and ts < dt + interval '0 1:2:4' day to second + and ts >= dt - interval '0 1:2:3' day to second + and ts > dt - interval '0 1:2:4' day to second +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' != dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' <= dt + interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' < dt + interval '0 1:2:4' day to second + and timestamp '2001-01-01 01:02:03' >= dt - interval '0 1:2:3' day to second + and timestamp '2001-01-01 01:02:03' > dt - interval '0 1:2:4' day to second + + and dt + interval '0 1:2:3' day to second = timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second != timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:3' day to second >= timestamp '2001-01-01 01:02:03' + and dt + interval '0 1:2:4' day to second > timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:3' day to second <= timestamp '2001-01-01 01:02:03' + and dt - interval '0 1:2:4' day to second < timestamp '2001-01-01 01:02:03' + + and ts = dt + interval '0 1:2:3' day to second + and ts != dt + interval '0 1:2:4' day to second + and ts <= dt + interval '0 1:2:3' day to second + and ts < dt + interval '0 1:2:4' day to second + and ts >= dt - interval '0 1:2:3' day to second + and ts > dt - interval '0 1:2:4' day to second +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: explain +select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = ts + interval '0' day + and timestamp '2001-01-01 01:02:03' != ts + interval '1' day + and timestamp '2001-01-01 01:02:03' <= ts + interval '1' day + and timestamp '2001-01-01 01:02:03' < ts + interval '1' day + and timestamp '2001-01-01 01:02:03' >= ts - interval '1' day + and timestamp '2001-01-01 01:02:03' > ts - interval '1' day + + and ts + interval '0' day = timestamp '2001-01-01 01:02:03' + and ts + interval '1' day != timestamp '2001-01-01 01:02:03' + and ts + interval '1' day >= timestamp '2001-01-01 01:02:03' + and ts + interval '1' day > timestamp '2001-01-01 01:02:03' + and ts - interval '1' day <= timestamp '2001-01-01 01:02:03' + and ts - interval '1' day < timestamp '2001-01-01 01:02:03' + + and ts = ts + interval '0' day + and ts != ts + interval '1' day + and ts <= ts + interval '1' day + and ts < ts + interval '1' day + and ts >= ts - interval '1' day + and ts > ts - interval '1' day +order by ts +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = ts + interval '0' day + and timestamp '2001-01-01 01:02:03' != ts + interval '1' day + and timestamp '2001-01-01 01:02:03' <= ts + interval '1' day + and timestamp '2001-01-01 01:02:03' < ts + interval '1' day + and timestamp '2001-01-01 01:02:03' >= ts - interval '1' day + and timestamp '2001-01-01 01:02:03' > ts - interval '1' day + + and ts + interval '0' day = timestamp '2001-01-01 01:02:03' + and ts + interval '1' day != timestamp '2001-01-01 01:02:03' + and ts + interval '1' day >= timestamp '2001-01-01 01:02:03' + and ts + interval '1' day > timestamp '2001-01-01 01:02:03' + and ts - interval '1' day <= timestamp '2001-01-01 01:02:03' + and ts - interval '1' day < timestamp '2001-01-01 01:02:03' + + and ts = ts + interval '0' day + and ts != ts + interval '1' day + and ts <= ts + interval '1' day + and ts < ts + interval '1' day + and ts >= ts - interval '1' day + and ts > ts - interval '1' day +order by ts +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_interval_2 + Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((((((((((((((2001-01-01 01:02:03.0 = (ts + 0 00:00:00.000000000)) and (2001-01-01 01:02:03.0 <> (ts + 1 00:00:00.000000000))) and (2001-01-01 01:02:03.0 <= (ts + 1 00:00:00.000000000))) and (2001-01-01 01:02:03.0 < (ts + 1 00:00:00.000000000))) and (2001-01-01 01:02:03.0 >= (ts - 1 00:00:00.000000000))) and (2001-01-01 01:02:03.0 > (ts - 1 00:00:00.000000000))) and ((ts + 0 00:00:00.000000000) = 2001-01-01 01:02:03.0)) and ((ts + 1 00:00:00.000000000) <> 2001-01-01 01:02:03.0)) and ((ts + 1 00:00:00.000000000) >= 2001-01-01 01:02:03.0)) and ((ts + 1 00:00:00.000000000) > 2001-01-01 01:02:03.0)) and ((ts - 1 00:00:00.000000000) <= 2001-01-01 01:02:03.0)) and ((ts - 1 00:00:00.000000000) < 2001-01-01 01:02:03.0)) and (ts = (ts + 0 00:00:00.000000000))) and (ts <> (ts + 1 00:00:00.000000000))) and (ts <= (ts + 1 00:00:00.000000000))) and (ts < (ts + 1 00:00:00.000000000))) and (ts >= (ts - 1 00:00:00.000000000))) and (ts > (ts - 1 00:00:00.000000000))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = ts + interval '0' day + and timestamp '2001-01-01 01:02:03' != ts + interval '1' day + and timestamp '2001-01-01 01:02:03' <= ts + interval '1' day + and timestamp '2001-01-01 01:02:03' < ts + interval '1' day + and timestamp '2001-01-01 01:02:03' >= ts - interval '1' day + and timestamp '2001-01-01 01:02:03' > ts - interval '1' day + + and ts + interval '0' day = timestamp '2001-01-01 01:02:03' + and ts + interval '1' day != timestamp '2001-01-01 01:02:03' + and ts + interval '1' day >= timestamp '2001-01-01 01:02:03' + and ts + interval '1' day > timestamp '2001-01-01 01:02:03' + and ts - interval '1' day <= timestamp '2001-01-01 01:02:03' + and ts - interval '1' day < timestamp '2001-01-01 01:02:03' + + and ts = ts + interval '0' day + and ts != ts + interval '1' day + and ts <= ts + interval '1' day + and ts < ts + interval '1' day + and ts >= ts - interval '1' day + and ts > ts - interval '1' day +order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +POSTHOOK: query: select ts from vector_interval_2 +where + timestamp '2001-01-01 01:02:03' = ts + interval '0' day + and timestamp '2001-01-01 01:02:03' != ts + interval '1' day + and timestamp '2001-01-01 01:02:03' <= ts + interval '1' day + and timestamp '2001-01-01 01:02:03' < ts + interval '1' day + and timestamp '2001-01-01 01:02:03' >= ts - interval '1' day + and timestamp '2001-01-01 01:02:03' > ts - interval '1' day + + and ts + interval '0' day = timestamp '2001-01-01 01:02:03' + and ts + interval '1' day != timestamp '2001-01-01 01:02:03' + and ts + interval '1' day >= timestamp '2001-01-01 01:02:03' + and ts + interval '1' day > timestamp '2001-01-01 01:02:03' + and ts - interval '1' day <= timestamp '2001-01-01 01:02:03' + and ts - interval '1' day < timestamp '2001-01-01 01:02:03' + + and ts = ts + interval '0' day + and ts != ts + interval '1' day + and ts <= ts + interval '1' day + and ts < ts + interval '1' day + and ts >= ts - interval '1' day + and ts > ts - interval '1' day +order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_interval_2 +#### A masked pattern was here #### +2001-01-01 01:02:03 +PREHOOK: query: drop table vector_interval_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@vector_interval_2 +PREHOOK: Output: default@vector_interval_2 +POSTHOOK: query: drop table vector_interval_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@vector_interval_2 +POSTHOOK: Output: default@vector_interval_2 Index: ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out (revision 0) +++ ql/src/test/results/clientpositive/tez/vector_multi_insert.q.out (working copy) @@ -0,0 +1,225 @@ +PREHOOK: query: create table orc1 + stored as orc + tblproperties("orc.compress"="ZLIB") + as + select rn + from + ( + select * from (select cast(1 as int) as rn from src limit 1)a + union all + select * from (select cast(100 as int) as rn from src limit 1)b + union all + select * from (select cast(10000 as int) as rn from src limit 1)c + ) t +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@orc1 +POSTHOOK: query: create table orc1 + stored as orc + tblproperties("orc.compress"="ZLIB") + as + select rn + from + ( + select * from (select cast(1 as int) as rn from src limit 1)a + union all + select * from (select cast(100 as int) as rn from src limit 1)b + union all + select * from (select cast(10000 as int) as rn from src limit 1)c + ) t +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc1 +PREHOOK: query: create table orc_rn1 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn1 +POSTHOOK: query: create table orc_rn1 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn1 +PREHOOK: query: create table orc_rn2 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn2 +POSTHOOK: query: create table orc_rn2 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn2 +PREHOOK: query: create table orc_rn3 (rn int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_rn3 +POSTHOOK: query: create table orc_rn3 (rn int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_rn3 +PREHOOK: query: explain from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +PREHOOK: type: QUERY +POSTHOOK: query: explain from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 154 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (rn < 100) (type: boolean) + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn1 + Filter Operator + predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn2 + Filter Operator + predicate: (rn >= 1000) (type: boolean) + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rn (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 51 Data size: 204 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn3 + Execution mode: vectorized + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn2 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orc_rn3 + + Stage: Stage-7 + Stats-Aggr Operator + +PREHOOK: query: from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc1 +PREHOOK: Output: default@orc_rn1 +PREHOOK: Output: default@orc_rn2 +PREHOOK: Output: default@orc_rn3 +POSTHOOK: query: from orc1 a +insert overwrite table orc_rn1 select a.* where a.rn < 100 +insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000 +insert overwrite table orc_rn3 select a.* where a.rn >= 1000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc1 +POSTHOOK: Output: default@orc_rn1 +POSTHOOK: Output: default@orc_rn2 +POSTHOOK: Output: default@orc_rn3 +POSTHOOK: Lineage: orc_rn1.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +POSTHOOK: Lineage: orc_rn2.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +POSTHOOK: Lineage: orc_rn3.rn SIMPLE [(orc1)a.FieldSchema(name:rn, type:int, comment:null), ] +PREHOOK: query: select * from orc_rn1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn1 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn1 +#### A masked pattern was here #### +1 +PREHOOK: query: select * from orc_rn2 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn2 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn2 +#### A masked pattern was here #### +100 +PREHOOK: query: select * from orc_rn3 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_rn3 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_rn3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_rn3 +#### A masked pattern was here #### +10000 Index: ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out =================================================================== --- ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out (working copy) @@ -94,52 +94,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: test_tab - Statistics: Num rows: 17 Data size: 1761 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '484') (type: boolean) - Statistics: Num rows: 8 Data size: 828 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), value (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 828 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 8 Data size: 828 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: key=484 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -182,14 +144,20 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_tab name: default.test_tab - Truncated Path -> Alias: - /test_tab/part=1/key=484 [test_tab] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: test_tab + Statistics: Num rows: 17 Data size: 1761 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '484') (type: boolean) + Statistics: Num rows: 8 Data size: 828 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), value (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 828 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: SELECT * FROM test_tab WHERE part = '1' AND key = '484' PREHOOK: type: QUERY @@ -233,52 +201,14 @@ STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: test_tab - Statistics: Num rows: 17 Data size: 1761 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '0') (type: boolean) - Statistics: Num rows: 8 Data size: 828 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '0' (type: string), value (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 828 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 8 Data size: 828 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: Partition - base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -321,14 +251,20 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_tab name: default.test_tab - Truncated Path -> Alias: - /test_tab/part=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [test_tab] - - Stage: Stage-0 - Fetch Operator - limit: -1 Processor Tree: - ListSink + TableScan + alias: test_tab + Statistics: Num rows: 17 Data size: 1761 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '0') (type: boolean) + Statistics: Num rows: 8 Data size: 828 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '0' (type: string), value (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 828 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: SELECT * FROM test_tab WHERE part = '1' AND key = '0' PREHOOK: type: QUERY Index: ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out =================================================================== --- ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out (working copy) @@ -505,11 +505,11 @@ #### A masked pattern was here #### [26.0,255.5,479.0,491.0] PREHOOK: query: -- NaN -explain +explain select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket PREHOOK: type: QUERY POSTHOOK: query: -- NaN -explain +explain select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -566,11 +566,11 @@ #### A masked pattern was here #### 341.5 PREHOOK: query: -- with CBO -explain +explain select percentile_approx(key, 0.5) from bucket PREHOOK: type: QUERY POSTHOOK: query: -- with CBO -explain +explain select percentile_approx(key, 0.5) from bucket POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -617,12 +617,12 @@ Processor Tree: ListSink -PREHOOK: query: select percentile_approx(key, 0.5) from bucket +PREHOOK: query: select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket PREHOOK: type: QUERY PREHOOK: Input: default@bucket #### A masked pattern was here #### -POSTHOOK: query: select percentile_approx(key, 0.5) from bucket +POSTHOOK: query: select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket POSTHOOK: type: QUERY POSTHOOK: Input: default@bucket #### A masked pattern was here #### -255.5 +true Index: ql/src/test/results/clientpositive/udf_day.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_day.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/udf_day.q.out (working copy) @@ -2,14 +2,17 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION day POSTHOOK: type: DESCFUNCTION -day(date) - Returns the date of the month of date +day(param) - Returns the day of the month of date/timestamp, or day component of interval PREHOOK: query: DESCRIBE FUNCTION EXTENDED day PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED day POSTHOOK: type: DESCFUNCTION -day(date) - Returns the date of the month of date +day(param) - Returns the day of the month of date/timestamp, or day component of interval Synonyms: dayofmonth -date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. -Example: +param can be one of: +1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. +2. A date value +3. A timestamp value +4. A day-time interval valueExample: > SELECT day('2009-07-30') FROM src LIMIT 1; 30 Index: ql/src/test/results/clientpositive/udf_dayofmonth.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_dayofmonth.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/udf_dayofmonth.q.out (working copy) @@ -2,14 +2,17 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION dayofmonth POSTHOOK: type: DESCFUNCTION -dayofmonth(date) - Returns the date of the month of date +dayofmonth(param) - Returns the day of the month of date/timestamp, or day component of interval PREHOOK: query: DESCRIBE FUNCTION EXTENDED dayofmonth PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED dayofmonth POSTHOOK: type: DESCFUNCTION -dayofmonth(date) - Returns the date of the month of date +dayofmonth(param) - Returns the day of the month of date/timestamp, or day component of interval Synonyms: day -date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. -Example: +param can be one of: +1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. +2. A date value +3. A timestamp value +4. A day-time interval valueExample: > SELECT dayofmonth('2009-07-30') FROM src LIMIT 1; 30 Index: ql/src/test/results/clientpositive/udf_format_number.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_format_number.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/udf_format_number.q.out (working copy) @@ -211,3 +211,20 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### 12,332.1235 12,332.12 12,332.1000 -12,332 12,332.6000 +PREHOOK: query: -- nulls +SELECT + format_number(cast(null as int), 0), + format_number(12332.123456BD, cast(null as int)), + format_number(cast(null as int), cast(null as int)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: -- nulls +SELECT + format_number(cast(null as int), 0), + format_number(12332.123456BD, cast(null as int)), + format_number(cast(null as int), cast(null as int)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +NULL NULL NULL Index: ql/src/test/results/clientpositive/udf_hour.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_hour.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/udf_hour.q.out (working copy) @@ -2,14 +2,16 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION hour POSTHOOK: type: DESCFUNCTION -hour(date) - Returns the hour of date +hour(param) - Returns the hour componemnt of the string/timestamp/interval PREHOOK: query: DESCRIBE FUNCTION EXTENDED hour PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED hour POSTHOOK: type: DESCFUNCTION -hour(date) - Returns the hour of date -date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. -Example: +hour(param) - Returns the hour componemnt of the string/timestamp/interval +param can be one of: +1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. +2. A timestamp value +3. A day-time interval valueExample: > SELECT hour('2009-07-30 12:58:59') FROM src LIMIT 1; 12 > SELECT hour('12:58:59') FROM src LIMIT 1; Index: ql/src/test/results/clientpositive/udf_minute.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_minute.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/udf_minute.q.out (working copy) @@ -2,14 +2,16 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION minute POSTHOOK: type: DESCFUNCTION -minute(date) - Returns the minute of date +minute(param) - Returns the minute component of the string/timestamp/interval PREHOOK: query: DESCRIBE FUNCTION EXTENDED minute PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED minute POSTHOOK: type: DESCFUNCTION -minute(date) - Returns the minute of date -date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. -Example: +minute(param) - Returns the minute component of the string/timestamp/interval +param can be one of: +1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. +2. A timestamp value +3. A day-time interval valueExample: > SELECT minute('2009-07-30 12:58:59') FROM src LIMIT 1; 58 > SELECT minute('12:58:59') FROM src LIMIT 1; Index: ql/src/test/results/clientpositive/udf_month.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_month.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/udf_month.q.out (working copy) @@ -2,14 +2,16 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION minute POSTHOOK: type: DESCFUNCTION -minute(date) - Returns the minute of date +minute(param) - Returns the minute component of the string/timestamp/interval PREHOOK: query: DESCRIBE FUNCTION EXTENDED minute PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED minute POSTHOOK: type: DESCFUNCTION -minute(date) - Returns the minute of date -date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. -Example: +minute(param) - Returns the minute component of the string/timestamp/interval +param can be one of: +1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. +2. A timestamp value +3. A day-time interval valueExample: > SELECT minute('2009-07-30 12:58:59') FROM src LIMIT 1; 58 > SELECT minute('12:58:59') FROM src LIMIT 1; Index: ql/src/test/results/clientpositive/udf_months_between.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_months_between.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_months_between.q.out (working copy) @@ -0,0 +1,225 @@ +PREHOOK: query: describe function months_between +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: describe function months_between +POSTHOOK: type: DESCFUNCTION +months_between(date1, date2) - returns number of months between dates date1 and date2 +PREHOOK: query: desc function extended months_between +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: desc function extended months_between +POSTHOOK: type: DESCFUNCTION +months_between(date1, date2) - returns number of months between dates date1 and date2 +If date1 is later than date2, then the result is positive. If date1 is earlier than date2, then the result is negative. If date1 and date2 are either the same days of the month or both last days of months, then the result is always an integer. Otherwise the UDF calculates the fractional portion of the result based on a 31-day month and considers the difference in time components date1 and date2. +date1 and date2 type can be date, timestamp or string in the format 'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss'. The result is rounded to 8 decimal places. + Example: + > SELECT months_between('1997-02-28 10:30:00', '1996-10-30'); + 3.94959677 +PREHOOK: query: --test string format +explain select months_between('1995-02-02', '1995-01-01') +PREHOOK: type: QUERY +POSTHOOK: query: --test string format +explain select months_between('1995-02-02', '1995-01-01') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: 1.03225806 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + +PREHOOK: query: select + months_between('1995-02-02', '1995-01-01'), + months_between('2003-07-17', '2005-07-06'), + months_between('2001-06-30', '2000-05-31'), + months_between('2000-06-01', '2004-07-01'), + months_between('2002-02-28', '2002-03-01'), + months_between('2002-02-31', '2002-03-01'), + months_between('2012-02-29', '2012-03-01'), + months_between('2012-02-31', '2012-03-01'), + months_between('1976-01-01 00:00:00', '1975-12-31 23:59:59'), + months_between('1976-01-01', '1975-12-31 23:59:59'), + months_between('1997-02-28 10:30:00', '1996-10-30'), + -- if both are last day of the month then time part should be ignored + months_between('2002-03-31', '2002-02-28'), + months_between('2002-03-31', '2002-02-28 10:30:00'), + months_between('2002-03-31 10:30:00', '2002-02-28'), + -- if the same day of the month then time part should be ignored + months_between('2002-03-24', '2002-02-24'), + months_between('2002-03-24', '2002-02-24 10:30:00'), + months_between('2002-03-24 10:30:00', '2002-02-24'), + -- partial time. time part will be skipped + months_between('1995-02-02 10:39', '1995-01-01'), + months_between('1995-02-02', '1995-01-01 10:39'), + -- no leading 0 for month and day should work + months_between('1995-02-2', '1995-1-01'), + months_between('1995-2-02', '1995-01-1'), + -- short year should work + months_between('495-2-02', '495-01-1'), + months_between('95-2-02', '95-01-1'), + months_between('5-2-02', '5-01-1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select + months_between('1995-02-02', '1995-01-01'), + months_between('2003-07-17', '2005-07-06'), + months_between('2001-06-30', '2000-05-31'), + months_between('2000-06-01', '2004-07-01'), + months_between('2002-02-28', '2002-03-01'), + months_between('2002-02-31', '2002-03-01'), + months_between('2012-02-29', '2012-03-01'), + months_between('2012-02-31', '2012-03-01'), + months_between('1976-01-01 00:00:00', '1975-12-31 23:59:59'), + months_between('1976-01-01', '1975-12-31 23:59:59'), + months_between('1997-02-28 10:30:00', '1996-10-30'), + -- if both are last day of the month then time part should be ignored + months_between('2002-03-31', '2002-02-28'), + months_between('2002-03-31', '2002-02-28 10:30:00'), + months_between('2002-03-31 10:30:00', '2002-02-28'), + -- if the same day of the month then time part should be ignored + months_between('2002-03-24', '2002-02-24'), + months_between('2002-03-24', '2002-02-24 10:30:00'), + months_between('2002-03-24 10:30:00', '2002-02-24'), + -- partial time. time part will be skipped + months_between('1995-02-02 10:39', '1995-01-01'), + months_between('1995-02-02', '1995-01-01 10:39'), + -- no leading 0 for month and day should work + months_between('1995-02-2', '1995-1-01'), + months_between('1995-2-02', '1995-01-1'), + -- short year should work + months_between('495-2-02', '495-01-1'), + months_between('95-2-02', '95-01-1'), + months_between('5-2-02', '5-01-1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1.03225806 -23.64516129 13.0 -49.0 -0.12903226 0.06451613 -0.09677419 0.03225806 3.7E-7 3.7E-7 3.94959677 1.0 1.0 1.0 1.0 1.0 1.0 1.03225806 1.03225806 1.03225806 1.03225806 1.03225806 1.03225806 1.03225806 +PREHOOK: query: --test timestamp format +select + months_between(cast('1995-02-02 00:00:00' as timestamp), cast('1995-01-01 00:00:00' as timestamp)), + months_between(cast('2003-07-17 00:00:00' as timestamp), cast('2005-07-06 00:00:00' as timestamp)), + months_between(cast('2001-06-30 00:00:00' as timestamp), cast('2000-05-31 00:00:00' as timestamp)), + months_between(cast('2000-06-01 00:00:00' as timestamp), cast('2004-07-01 00:00:00' as timestamp)), + months_between(cast('2002-02-28 00:00:00' as timestamp), cast('2002-03-01 00:00:00' as timestamp)), + months_between(cast('2002-02-31 00:00:00' as timestamp), cast('2002-03-01 00:00:00' as timestamp)), + months_between(cast('2012-02-29 00:00:00' as timestamp), cast('2012-03-01 00:00:00' as timestamp)), + months_between(cast('2012-02-31 00:00:00' as timestamp), cast('2012-03-01 00:00:00' as timestamp)), + months_between(cast('1976-01-01 00:00:00' as timestamp), cast('1975-12-31 23:59:59' as timestamp)), + months_between(cast('1976-01-01' as date), cast('1975-12-31 23:59:59' as timestamp)), + months_between(cast('1997-02-28 10:30:00' as timestamp), cast('1996-10-30' as date)), + -- if both are last day of the month then time part should be ignored + months_between(cast('2002-03-31 00:00:00' as timestamp), cast('2002-02-28 00:00:00' as timestamp)), + months_between(cast('2002-03-31 00:00:00' as timestamp), cast('2002-02-28 10:30:00' as timestamp)), + months_between(cast('2002-03-31 10:30:00' as timestamp), cast('2002-02-28 00:00:00' as timestamp)), + -- if the same day of the month then time part should be ignored + months_between(cast('2002-03-24 00:00:00' as timestamp), cast('2002-02-24 00:00:00' as timestamp)), + months_between(cast('2002-03-24 00:00:00' as timestamp), cast('2002-02-24 10:30:00' as timestamp)), + months_between(cast('2002-03-24 10:30:00' as timestamp), cast('2002-02-24 00:00:00' as timestamp)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: --test timestamp format +select + months_between(cast('1995-02-02 00:00:00' as timestamp), cast('1995-01-01 00:00:00' as timestamp)), + months_between(cast('2003-07-17 00:00:00' as timestamp), cast('2005-07-06 00:00:00' as timestamp)), + months_between(cast('2001-06-30 00:00:00' as timestamp), cast('2000-05-31 00:00:00' as timestamp)), + months_between(cast('2000-06-01 00:00:00' as timestamp), cast('2004-07-01 00:00:00' as timestamp)), + months_between(cast('2002-02-28 00:00:00' as timestamp), cast('2002-03-01 00:00:00' as timestamp)), + months_between(cast('2002-02-31 00:00:00' as timestamp), cast('2002-03-01 00:00:00' as timestamp)), + months_between(cast('2012-02-29 00:00:00' as timestamp), cast('2012-03-01 00:00:00' as timestamp)), + months_between(cast('2012-02-31 00:00:00' as timestamp), cast('2012-03-01 00:00:00' as timestamp)), + months_between(cast('1976-01-01 00:00:00' as timestamp), cast('1975-12-31 23:59:59' as timestamp)), + months_between(cast('1976-01-01' as date), cast('1975-12-31 23:59:59' as timestamp)), + months_between(cast('1997-02-28 10:30:00' as timestamp), cast('1996-10-30' as date)), + -- if both are last day of the month then time part should be ignored + months_between(cast('2002-03-31 00:00:00' as timestamp), cast('2002-02-28 00:00:00' as timestamp)), + months_between(cast('2002-03-31 00:00:00' as timestamp), cast('2002-02-28 10:30:00' as timestamp)), + months_between(cast('2002-03-31 10:30:00' as timestamp), cast('2002-02-28 00:00:00' as timestamp)), + -- if the same day of the month then time part should be ignored + months_between(cast('2002-03-24 00:00:00' as timestamp), cast('2002-02-24 00:00:00' as timestamp)), + months_between(cast('2002-03-24 00:00:00' as timestamp), cast('2002-02-24 10:30:00' as timestamp)), + months_between(cast('2002-03-24 10:30:00' as timestamp), cast('2002-02-24 00:00:00' as timestamp)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1.03225806 -23.64516129 13.0 -49.0 -0.12903226 0.06451613 -0.09677419 0.03225806 3.7E-7 3.7E-7 3.94959677 1.0 1.0 1.0 1.0 1.0 1.0 +PREHOOK: query: --test date format +select + months_between(cast('1995-02-02' as date), cast('1995-01-01' as date)), + months_between(cast('2003-07-17' as date), cast('2005-07-06' as date)), + months_between(cast('2001-06-30' as date), cast('2000-05-31' as date)), + months_between(cast('2000-06-01' as date), cast('2004-07-01' as date)), + months_between(cast('2002-02-28' as date), cast('2002-03-01' as date)), + months_between(cast('2002-02-31' as date), cast('2002-03-01' as date)), + months_between(cast('2012-02-29' as date), cast('2012-03-01' as date)), + months_between(cast('2012-02-31' as date), cast('2012-03-01' as date)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: --test date format +select + months_between(cast('1995-02-02' as date), cast('1995-01-01' as date)), + months_between(cast('2003-07-17' as date), cast('2005-07-06' as date)), + months_between(cast('2001-06-30' as date), cast('2000-05-31' as date)), + months_between(cast('2000-06-01' as date), cast('2004-07-01' as date)), + months_between(cast('2002-02-28' as date), cast('2002-03-01' as date)), + months_between(cast('2002-02-31' as date), cast('2002-03-01' as date)), + months_between(cast('2012-02-29' as date), cast('2012-03-01' as date)), + months_between(cast('2012-02-31' as date), cast('2012-03-01' as date)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1.03225806 -23.64516129 13.0 -49.0 -0.12903226 0.06451613 -0.09677419 0.03225806 +PREHOOK: query: --test misc with null +select + months_between(cast(null as string), '2012-03-01'), + months_between('2012-02-31', cast(null as timestamp)), + months_between(cast(null as timestamp), cast(null as date)), + months_between(cast(null as string), cast('2012-03-01 00:00:00' as timestamp)), + months_between(cast('2012-02-31 00:00:00' as timestamp), cast(null as string)), + months_between(cast(null as timestamp), cast('2012-03-01' as string)), + months_between(cast('2012-02-31' as date), cast(null as string)), + months_between('2012-02-10', cast(null as string)), + months_between(cast(null as string), '2012-02-10'), + months_between(cast(null as string), cast(null as string)), + months_between('2012-02-10', cast(null as timestamp)), + months_between(cast(null as timestamp), '2012-02-10'), + months_between(cast(null as timestamp), cast(null as timestamp)), + -- string dates without day should be parsed to null + months_between('2012-03', '2012-02-24'), + months_between('2012-03-24', '2012-02') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: --test misc with null +select + months_between(cast(null as string), '2012-03-01'), + months_between('2012-02-31', cast(null as timestamp)), + months_between(cast(null as timestamp), cast(null as date)), + months_between(cast(null as string), cast('2012-03-01 00:00:00' as timestamp)), + months_between(cast('2012-02-31 00:00:00' as timestamp), cast(null as string)), + months_between(cast(null as timestamp), cast('2012-03-01' as string)), + months_between(cast('2012-02-31' as date), cast(null as string)), + months_between('2012-02-10', cast(null as string)), + months_between(cast(null as string), '2012-02-10'), + months_between(cast(null as string), cast(null as string)), + months_between('2012-02-10', cast(null as timestamp)), + months_between(cast(null as timestamp), '2012-02-10'), + months_between(cast(null as timestamp), cast(null as timestamp)), + -- string dates without day should be parsed to null + months_between('2012-03', '2012-02-24'), + months_between('2012-03-24', '2012-02') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL Index: ql/src/test/results/clientpositive/udf_reflect2.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_reflect2.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/udf_reflect2.q.out (working copy) @@ -320,17 +320,13 @@ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: UDFToInteger(key) (type: int), reflect2(UDFToInteger(key),'byteValue') (type: tinyint), reflect2(UDFToInteger(key),'shortValue') (type: smallint), reflect2(UDFToInteger(key),'intValue') (type: int), reflect2(UDFToInteger(key),'longValue') (type: bigint), reflect2(UDFToInteger(key),'floatValue') (type: float), reflect2(UDFToInteger(key),'doubleValue') (type: double), reflect2(UDFToInteger(key),'toString') (type: string), value (type: string), reflect2(value,'concat','_concat') (type: string), reflect2(value,'contains','86') (type: boolean), reflect2(value,'startsWith','v') (type: boolean), reflect2(value,'endsWith','6') (type: boolean), reflect2(value,'equals','val_86') (type: boolean), reflect2(value,'equalsIgnoreCase','VAL_86') (type: boolean), reflect2(value,'getBytes') (type: binary), reflect2(value,'indexOf','1') (type: int), reflect2(value,'lastIndexOf','1') (type: int), reflect2(value,'replace','val','VALUE') (type: string), reflect2(value,'substring',1) (type: string), reflect2(value,'substring',1,5) (type: string), reflect2(value,'toUpperCase') (type: string), reflect2(value,'trim') (type: string), 2013-02-15 19:41:20.0 (type: timestamp), 113 (type: int), 1 (type: int), 5 (type: int), 19 (type: int), 41 (type: int), 20 (type: int), 1360986080000 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), reflect2(_col0,'byteValue') (type: tinyint), reflect2(_col0,'shortValue') (type: smallint), reflect2(_col0,'intValue') (type: int), reflect2(_col0,'longValue') (type: bigint), reflect2(_col0,'floatValue') (type: float), reflect2(_col0,'doubleValue') (type: double), reflect2(_col0,'toString') (type: string), _col1 (type: string), reflect2(_col1,'concat','_concat') (type: string), reflect2(_col1,'contains','86') (type: boolean), reflect2(_col1,'startsWith','v') (type: boolean), reflect2(_col1,'endsWith','6') (type: boolean), reflect2(_col1,'equals','val_86') (type: boolean), reflect2(_col1,'equalsIgnoreCase','VAL_86') (type: boolean), reflect2(_col1,'getBytes') (type: binary), reflect2(_col1,'indexOf','1') (type: int), reflect2(_col1,'lastIndexOf','1') (type: int), reflect2(_col1,'replace','val','VALUE') (type: string), reflect2(_col1,'substring',1) (type: string), reflect2(_col1,'substring',1,5) (type: string), reflect2(_col1,'toUpperCase') (type: string), reflect2(_col1,'trim') (type: string), 2013-02-15 19:41:20.0 (type: timestamp), 113 (type: int), 1 (type: int), 5 (type: int), 19 (type: int), 41 (type: int), 20 (type: int), 1360986080000 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - ListSink + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: SELECT key, reflect2(key, "byteValue"), Index: ql/src/test/results/clientpositive/udf_second.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_second.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/udf_second.q.out (working copy) @@ -2,14 +2,16 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION second POSTHOOK: type: DESCFUNCTION -second(date) - Returns the second of date +second(date) - Returns the second component of the string/timestamp/interval PREHOOK: query: DESCRIBE FUNCTION EXTENDED second PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED second POSTHOOK: type: DESCFUNCTION -second(date) - Returns the second of date -date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. -Example: +second(date) - Returns the second component of the string/timestamp/interval +param can be one of: +1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. +2. A timestamp value +3. A day-time interval valueExample: > SELECT second('2009-07-30 12:58:59') FROM src LIMIT 1; 59 > SELECT second('12:58:59') FROM src LIMIT 1; Index: ql/src/test/results/clientpositive/union24.q.out =================================================================== --- ql/src/test/results/clientpositive/union24.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/union24.q.out (working copy) @@ -262,7 +262,7 @@ name: default.src5 name: default.src5 Truncated Path -> Alias: - /src5 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_0:src5] + /src5 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:src5] Needs Tagging: false Reduce Operator Tree: Group By Operator Index: ql/src/test/results/clientpositive/unionDistinct_1.q.out =================================================================== --- ql/src/test/results/clientpositive/unionDistinct_1.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/unionDistinct_1.q.out (working copy) @@ -9089,7 +9089,7 @@ name: default.src5 name: default.src5 Truncated Path -> Alias: - /src5 [$hdt$_0-subquery2:$hdt$_0-subquery2:$hdt$_0:$hdt$_0:src5] + /src5 [$hdt$_0-subquery2:$hdt$_0-subquery2:$hdt$_0:src5] Needs Tagging: false Reduce Operator Tree: Group By Operator Index: ql/src/test/results/clientpositive/union_view.q.out =================================================================== --- ql/src/test/results/clientpositive/union_view.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/union_view.q.out (working copy) @@ -44,181 +44,67 @@ POSTHOOK: Input: default@src_union_3 POSTHOOK: Output: default@default__src_union_3_src_union_3_key_idx__ STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: default.default__src_union_1_src_union_1_key_idx__ - filterExpr: ((key = 86) and (ds = '1')) (type: boolean) - Filter Operator - predicate: (key = 86) (type: boolean) - Select Operator - expressions: _bucketname (type: string), _offsets (type: array) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src_union_1 - filterExpr: ((key = 86) and (ds = '1')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 86 (type: int), value (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: src_union_1 + filterExpr: ((key = 86) and (ds = '1')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 86 (type: int), value (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + ListSink STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: default.default__src_union_2_src_union_2_key_idx__ - filterExpr: ((key = 86) and (ds = '2')) (type: boolean) - Filter Operator - predicate: (key = 86) (type: boolean) - Select Operator - expressions: _bucketname (type: string), _offsets (type: array) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src_union_2 - filterExpr: ((key = 86) and (ds = '2')) (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 86 (type: int), value (type: string), '2' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: src_union_2 + filterExpr: ((key = 86) and (ds = '2')) (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 86 (type: int), value (type: string), '2' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + ListSink STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: default.default__src_union_3_src_union_3_key_idx__ - filterExpr: ((key = 86) and (ds = '3')) (type: boolean) - Filter Operator - predicate: (key = 86) (type: boolean) - Select Operator - expressions: _bucketname (type: string), _offsets (type: array) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-2 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src_union_3 - filterExpr: ((key = 86) and (ds = '3')) (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 86 (type: int), value (type: string), '3' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: src_union_3 + filterExpr: ((key = 86) and (ds = '3')) (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 86 (type: int), value (type: string), '3' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + ListSink 86 val_86 1 86 val_86 2 @@ -236,7 +122,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_1_src_union_1_key_idx__ + alias: default__src_union_1_src_union_1_key_idx__ filterExpr: (ds = '1') (type: boolean) Select Operator expressions: _bucketname (type: string), _offsets (type: array) @@ -303,7 +189,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_2_src_union_2_key_idx__ + alias: default__src_union_2_src_union_2_key_idx__ filterExpr: (ds = '2') (type: boolean) Select Operator expressions: _bucketname (type: string), _offsets (type: array) @@ -370,7 +256,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_3_src_union_3_key_idx__ + alias: default__src_union_3_src_union_3_key_idx__ filterExpr: (ds = '3') (type: boolean) Select Operator expressions: _bucketname (type: string), _offsets (type: array) @@ -440,7 +326,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_1_src_union_1_key_idx__ + alias: default__src_union_1_src_union_1_key_idx__ filterExpr: ((key = 86) and (ds = '1')) (type: boolean) Filter Operator predicate: (key = 86) (type: boolean) @@ -505,7 +391,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_2_src_union_2_key_idx__ + alias: default__src_union_2_src_union_2_key_idx__ filterExpr: ((key = 86) and (ds = '2')) (type: boolean) Filter Operator predicate: (key = 86) (type: boolean) @@ -570,7 +456,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_3_src_union_3_key_idx__ + alias: default__src_union_3_src_union_3_key_idx__ filterExpr: ((key = 86) and (ds = '3')) (type: boolean) Filter Operator predicate: (key = 86) (type: boolean) @@ -639,7 +525,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_1_src_union_1_key_idx__ + alias: default__src_union_1_src_union_1_key_idx__ filterExpr: (key = 86) (type: boolean) Filter Operator predicate: (key = 86) (type: boolean) @@ -745,7 +631,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_2_src_union_2_key_idx__ + alias: default__src_union_2_src_union_2_key_idx__ filterExpr: (key = 86) (type: boolean) Filter Operator predicate: (key = 86) (type: boolean) @@ -769,7 +655,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_3_src_union_3_key_idx__ + alias: default__src_union_3_src_union_3_key_idx__ filterExpr: (key = 86) (type: boolean) Filter Operator predicate: (key = 86) (type: boolean) @@ -816,7 +702,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_1_src_union_1_key_idx__ + alias: default__src_union_1_src_union_1_key_idx__ filterExpr: (ds = '1') (type: boolean) Select Operator expressions: _bucketname (type: string), _offsets (type: array) @@ -887,7 +773,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_2_src_union_2_key_idx__ + alias: default__src_union_2_src_union_2_key_idx__ filterExpr: (ds = '2') (type: boolean) Select Operator expressions: _bucketname (type: string), _offsets (type: array) @@ -958,7 +844,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_3_src_union_3_key_idx__ + alias: default__src_union_3_src_union_3_key_idx__ filterExpr: (ds = '3') (type: boolean) Select Operator expressions: _bucketname (type: string), _offsets (type: array) @@ -1032,7 +918,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_3_src_union_3_key_idx__ + alias: default__src_union_3_src_union_3_key_idx__ filterExpr: ((key = 86) and (ds = '4')) (type: boolean) Filter Operator predicate: (key = 86) (type: boolean) @@ -1098,7 +984,7 @@ Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_3_src_union_3_key_idx__ + alias: default__src_union_3_src_union_3_key_idx__ filterExpr: (ds = '4') (type: boolean) Select Operator expressions: _bucketname (type: string), _offsets (type: array) Index: ql/src/test/results/clientpositive/update_all_types.q.out =================================================================== --- ql/src/test/results/clientpositive/update_all_types.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/update_all_types.q.out (working copy) @@ -1,6 +1,7 @@ PREHOOK: query: create table acid_uat(ti tinyint, si smallint, i int, + j int, bi bigint, f float, d double, @@ -17,6 +18,7 @@ POSTHOOK: query: create table acid_uat(ti tinyint, si smallint, i int, + j int, bi bigint, f float, d double, @@ -34,6 +36,7 @@ select ctinyint, csmallint, cint, + cint j, cbigint, cfloat, cdouble, @@ -52,6 +55,7 @@ select ctinyint, csmallint, cint, + cint j, cbigint, cfloat, cdouble, @@ -74,6 +78,7 @@ POSTHOOK: Lineage: acid_uat.dt EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: acid_uat.f SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] POSTHOOK: Lineage: acid_uat.i SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_uat.j SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: acid_uat.s SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] POSTHOOK: Lineage: acid_uat.si SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] POSTHOOK: Lineage: acid_uat.t SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] @@ -87,20 +92,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_uat #### A masked pattern was here #### -11 NULL -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true -NULL -7382 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1970-01-01 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false -11 NULL -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false -NULL 8373 -1072081801 1864027286 NULL 8373.0 NULL NULL 1970-01-01 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true -NULL -5470 -1072076362 1864027286 NULL -5470.0 NULL NULL 1970-01-01 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true --51 NULL -1071480828 -1401575336 -51.0 NULL -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true -8 NULL -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true -NULL -741 -1070883071 -1645852809 NULL -741.0 NULL NULL 1970-01-01 0ruyd6Y50JpdGRf6HqD 0ruyd6Y50JpdGRf6HqD xH7445Rals48VOulSyR5F false -NULL -947 -1070551679 1864027286 NULL -947.0 NULL NULL 1970-01-01 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false -11 NULL -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true +11 NULL -1073279343 -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true +NULL -7382 -1073051226 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1969-12-31 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false +11 NULL -1072910839 -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false +NULL 8373 -1072081801 -1072081801 1864027286 NULL 8373.0 NULL NULL 1969-12-31 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true +NULL -5470 -1072076362 -1072076362 1864027286 NULL -5470.0 NULL NULL 1969-12-31 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true +-51 NULL -1071480828 -1071480828 -1401575336 -51.0 NULL -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true +8 NULL -1071363017 -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true +NULL -741 -1070883071 -1070883071 -1645852809 NULL -741.0 NULL NULL 1969-12-31 0ruyd6Y50JpdGRf6HqD 0ruyd6Y50JpdGRf6HqD xH7445Rals48VOulSyR5F false +NULL -947 -1070551679 -1070551679 1864027286 NULL -947.0 NULL NULL 1969-12-31 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false +11 NULL -1069736047 -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true PREHOOK: query: update acid_uat set ti = 1, si = 2, - i = 3, + j = 3, bi = 4, f = 3.14, d = 6.28, @@ -118,7 +123,7 @@ POSTHOOK: query: update acid_uat set ti = 1, si = 2, - i = 3, + j = 3, bi = 4, f = 3.14, d = 6.28, @@ -141,16 +146,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_uat #### A masked pattern was here #### -11 NULL -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true -NULL -7382 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1970-01-01 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false -11 NULL -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false -NULL 8373 -1072081801 1864027286 NULL 8373.0 NULL NULL 1970-01-01 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true -NULL -5470 -1072076362 1864027286 NULL -5470.0 NULL NULL 1970-01-01 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true --51 NULL -1071480828 -1401575336 -51.0 NULL -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true -8 NULL -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true -NULL -947 -1070551679 1864027286 NULL -947.0 NULL NULL 1970-01-01 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false -11 NULL -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true -1 2 3 4 3.14 6.28 5.99 NULL 2014-09-01 its a beautiful day in the neighbhorhood a beautiful day for a neighbor wont you be mine true +11 NULL -1073279343 -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true +NULL -7382 -1073051226 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1969-12-31 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false +11 NULL -1072910839 -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false +NULL 8373 -1072081801 -1072081801 1864027286 NULL 8373.0 NULL NULL 1969-12-31 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true +NULL -5470 -1072076362 -1072076362 1864027286 NULL -5470.0 NULL NULL 1969-12-31 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true +-51 NULL -1071480828 -1071480828 -1401575336 -51.0 NULL -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true +8 NULL -1071363017 -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true +1 2 -1070883071 3 4 3.14 6.28 5.99 NULL 2014-09-01 its a beautiful day in the neighbhorhood a beautiful day for a neighbor wont you be mine true +NULL -947 -1070551679 -1070551679 1864027286 NULL -947.0 NULL NULL 1969-12-31 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false +11 NULL -1069736047 -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true PREHOOK: query: update acid_uat set ti = ti * 2, si = cast(f as int), @@ -175,13 +180,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_uat #### A masked pattern was here #### -11 NULL -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true -NULL -7382 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1970-01-01 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false -11 NULL -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false -NULL 8373 -1072081801 1864027286 NULL 8373.0 NULL NULL 1970-01-01 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true -NULL -5470 -1072076362 1864027286 NULL -5470.0 NULL NULL 1970-01-01 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true --102 -51 -1071480828 -1401575336 -51.0 -51.0 -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true -8 NULL -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true -NULL -947 -1070551679 1864027286 NULL -947.0 NULL NULL 1970-01-01 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false -11 NULL -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true -1 2 3 4 3.14 6.28 5.99 NULL 2014-09-01 its a beautiful day in the neighbhorhood a beautiful day for a neighbor wont you be mine true +11 NULL -1073279343 -1073279343 -1595604468 11.0 NULL 11 1969-12-31 16:00:02.351 NULL oj1YrV5Wa oj1YrV5Wa P76636jJ6qM17d7DIy true +NULL -7382 -1073051226 -1073051226 -1887561756 NULL -7382.0 NULL NULL 1969-12-31 A34p7oRr2WvUJNf A34p7oRr2WvUJNf 4hA4KQj2vD3fI6gX82220d false +11 NULL -1072910839 -1072910839 2048385991 11.0 NULL 11 1969-12-31 16:00:02.351 NULL 0iqrc5 0iqrc5 KbaDXiN85adbHRx58v false +NULL 8373 -1072081801 -1072081801 1864027286 NULL 8373.0 NULL NULL 1969-12-31 dPkN74F7 dPkN74F7 4KWs6gw7lv2WYd66P true +NULL -5470 -1072076362 -1072076362 1864027286 NULL -5470.0 NULL NULL 1969-12-31 2uLyD28144vklju213J1mr 2uLyD28144vklju213J1mr 4KWs6gw7lv2WYd66P true +-102 -51 -1071480828 -1071480828 -1401575336 -51.0 -51.0 -51 1969-12-31 16:00:08.451 NULL aw724t8c5558x2xneC624 aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA true +8 NULL -1071363017 -1071363017 1349676361 8.0 NULL 8 1969-12-31 16:00:15.892 NULL Anj0oF Anj0oF IwE1G7Qb0B1NEfV030g true +1 2 -1070883071 3 4 3.14 6.28 5.99 NULL 2014-09-01 its a beautiful day in the neighbhorhood a beautiful day for a neighbor wont you be mine true +NULL -947 -1070551679 -1070551679 1864027286 NULL -947.0 NULL NULL 1969-12-31 iUR3Q iUR3Q 4KWs6gw7lv2WYd66P false +11 NULL -1069736047 -1069736047 -453772520 11.0 NULL 11 1969-12-31 16:00:02.351 NULL k17Am8uPHWk02cEf1jet k17Am8uPHWk02cEf1jet qrXLLNX1 true Index: ql/src/test/results/clientpositive/update_tmp_table.q.out =================================================================== --- ql/src/test/results/clientpositive/update_tmp_table.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/update_tmp_table.q.out (working copy) @@ -34,11 +34,11 @@ -1070883071 0ruyd6Y50JpdGRf6HqD -1070551679 iUR3Q -1069736047 k17Am8uPHWk02cEf1jet -PREHOOK: query: update acid_utt set b = 'fred' where b = '0ruyd6Y50JpdGRf6HqD' +PREHOOK: query: update acid_utt set a = 'fred' where b = '0ruyd6Y50JpdGRf6HqD' PREHOOK: type: QUERY PREHOOK: Input: default@acid_utt PREHOOK: Output: default@acid_utt -POSTHOOK: query: update acid_utt set b = 'fred' where b = '0ruyd6Y50JpdGRf6HqD' +POSTHOOK: query: update acid_utt set a = 'fred' where b = '0ruyd6Y50JpdGRf6HqD' POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_utt POSTHOOK: Output: default@acid_utt @@ -50,6 +50,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_utt #### A masked pattern was here #### +NULL 0ruyd6Y50JpdGRf6HqD -1073279343 oj1YrV5Wa -1073051226 A34p7oRr2WvUJNf -1072910839 0iqrc5 @@ -57,6 +58,5 @@ -1072076362 2uLyD28144vklju213J1mr -1071480828 aw724t8c5558x2xneC624 -1071363017 Anj0oF --1070883071 fred -1070551679 iUR3Q -1069736047 k17Am8uPHWk02cEf1jet Index: ql/src/test/results/clientpositive/vector_between_in.q.out =================================================================== --- ql/src/test/results/clientpositive/vector_between_in.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/vector_between_in.q.out (working copy) @@ -439,40 +439,40 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 +1969-12-31 1970-01-01 1970-01-01 1970-01-01 1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 -1970-01-01 PREHOOK: query: SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate PREHOOK: type: QUERY PREHOOK: Input: default@decimal_date_test @@ -481,2949 +481,2949 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_date_test #### A masked pattern was here #### -1678-01-21 -1678-01-24 -1678-02-04 -1678-03-20 -1678-04-12 -1678-08-17 -1678-11-01 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-06-06 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-08-14 -1680-10-13 -1680-12-14 -1681-02-27 -1681-06-10 -1681-10-21 -1682-03-29 +1678-01-20 +1678-01-23 +1678-02-03 +1678-03-19 +1678-04-11 +1678-08-16 +1678-10-31 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-06-05 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-08-13 +1680-10-12 +1680-12-13 +1681-02-26 +1681-06-09 +1681-10-20 +1682-03-28 +1682-05-05 1682-05-06 -1682-05-07 -1682-11-12 -1682-11-12 -1682-11-12 -1682-11-12 -1682-11-12 -1682-11-12 -1683-03-27 -1683-06-08 -1683-06-08 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-06-11 -1683-07-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1683-08-18 -1684-06-28 -1684-07-23 -1684-09-10 -1684-09-12 -1684-11-13 -1684-11-22 -1685-01-28 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-04-10 -1685-05-26 -1685-06-21 -1685-08-02 -1685-12-09 -1686-02-01 +1682-11-11 +1682-11-11 +1682-11-11 +1682-11-11 +1682-11-11 +1682-11-11 +1683-03-26 +1683-06-07 +1683-06-07 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-06-10 +1683-07-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1683-08-17 +1684-06-27 +1684-07-22 +1684-09-09 +1684-09-11 +1684-11-12 +1684-11-21 +1685-01-27 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-04-09 +1685-05-25 +1685-06-20 +1685-08-01 +1685-12-08 +1686-01-31 +1686-03-20 1686-03-21 -1686-03-22 -1686-04-02 -1686-11-30 -1686-12-03 -1686-12-03 -1686-12-03 -1686-12-03 -1686-12-03 -1686-12-03 -1687-02-02 -1687-03-05 -1687-03-24 -1687-05-24 -1687-07-16 -1687-09-29 -1687-10-25 -1687-11-07 -1687-11-19 -1687-12-17 -1688-07-24 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-07-26 -1688-08-03 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-08-28 -1688-12-30 -1689-01-31 -1689-09-24 -1689-10-30 -1690-01-28 -1690-03-13 -1690-03-20 -1690-05-28 -1690-09-21 -1690-10-01 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1690-11-26 -1691-02-10 -1691-07-18 -1691-08-09 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1691-11-14 -1692-05-13 -1692-11-14 -1693-03-18 -1693-07-16 -1693-12-17 -1693-12-27 -1694-06-30 -1694-08-05 -1694-11-18 -1694-11-18 -1694-11-18 -1695-05-20 -1695-10-19 -1695-11-04 -1695-11-11 -1696-02-17 -1696-02-27 -1696-03-24 -1696-04-09 -1696-05-03 -1696-05-18 -1696-05-28 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-11 -1696-09-26 -1696-11-04 -1696-12-17 -1697-01-25 -1697-05-02 -1697-05-28 -1697-08-30 -1697-10-13 -1697-12-10 -1698-03-02 -1698-04-29 -1698-05-06 -1698-05-26 -1698-05-26 -1698-06-12 -1698-08-09 -1698-11-25 -1698-12-10 -1698-12-10 -1698-12-10 -1699-03-10 -1699-03-14 -1699-04-18 -1699-09-28 -1700-02-27 -1700-04-11 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-04-20 -1700-05-01 -1700-05-18 -1700-05-18 -1700-05-18 -1700-05-18 -1700-05-18 -1700-06-24 -1700-08-09 -1700-08-14 -1700-09-23 -1700-11-20 -1700-11-24 -1701-02-18 -1701-03-21 -1701-05-30 -1701-07-24 +1686-04-01 +1686-11-29 +1686-12-02 +1686-12-02 +1686-12-02 +1686-12-02 +1686-12-02 +1686-12-02 +1687-02-01 +1687-03-04 +1687-03-23 +1687-05-23 +1687-07-15 +1687-09-28 +1687-10-24 +1687-11-06 +1687-11-18 +1687-12-16 +1688-07-23 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-07-25 +1688-08-02 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-08-27 +1688-12-29 +1689-01-30 +1689-09-23 +1689-10-29 +1690-01-27 +1690-03-12 +1690-03-19 +1690-05-27 +1690-09-20 +1690-09-30 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1690-11-25 +1691-02-09 +1691-07-17 +1691-08-08 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1691-11-13 +1692-05-12 +1692-11-13 +1693-03-17 +1693-07-15 +1693-12-16 +1693-12-26 +1694-06-29 +1694-08-04 +1694-11-17 +1694-11-17 +1694-11-17 +1695-05-19 +1695-10-18 +1695-11-03 +1695-11-10 +1696-02-16 +1696-02-26 +1696-03-23 +1696-04-08 +1696-05-02 +1696-05-17 +1696-05-27 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-10 +1696-09-25 +1696-11-03 +1696-12-16 +1697-01-24 +1697-05-01 +1697-05-27 +1697-08-29 +1697-10-12 +1697-12-09 +1698-03-01 +1698-04-28 +1698-05-05 +1698-05-25 +1698-05-25 +1698-06-11 +1698-08-08 +1698-11-24 +1698-12-09 +1698-12-09 +1698-12-09 +1699-03-09 +1699-03-13 +1699-04-17 +1699-09-27 +1700-02-26 +1700-04-10 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-19 +1700-04-30 +1700-05-17 +1700-05-17 +1700-05-17 +1700-05-17 +1700-05-17 +1700-06-23 +1700-08-08 +1700-08-13 +1700-09-22 +1700-11-19 +1700-11-23 +1701-02-17 +1701-03-20 +1701-05-29 +1701-07-23 +1701-07-29 1701-07-30 -1701-07-31 -1701-09-19 -1701-10-25 -1701-11-03 -1702-01-09 -1702-01-09 -1702-01-09 -1702-01-09 -1702-05-16 -1702-06-04 -1702-07-24 -1702-10-04 -1703-01-30 -1703-02-13 -1703-02-21 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-04-24 -1703-06-16 -1703-08-18 +1701-09-18 +1701-10-24 +1701-11-02 +1702-01-08 +1702-01-08 +1702-01-08 +1702-01-08 +1702-05-15 +1702-06-03 +1702-07-23 +1702-10-03 +1703-01-29 +1703-02-12 +1703-02-20 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-04-23 +1703-06-15 +1703-08-17 +1703-09-02 +1703-09-02 +1703-09-02 +1703-09-02 +1703-09-02 1703-09-03 -1703-09-03 -1703-09-03 -1703-09-03 -1703-09-03 -1703-09-04 -1703-09-20 -1703-10-27 -1704-01-23 -1704-08-07 -1704-08-15 -1704-08-20 -1704-09-26 -1704-09-26 -1704-09-26 -1704-09-26 -1704-09-26 -1704-11-24 -1704-12-22 -1705-02-23 -1705-04-18 -1705-04-26 -1705-04-26 -1705-04-26 -1705-04-26 -1705-06-09 -1705-08-06 -1705-12-04 -1706-01-11 -1706-02-13 -1706-06-11 -1706-06-11 -1706-06-21 -1706-06-23 -1706-06-25 -1706-07-13 -1706-07-24 -1706-08-08 -1706-08-11 -1706-09-01 -1706-09-25 -1706-11-15 -1706-12-01 +1703-09-19 +1703-10-26 +1704-01-22 +1704-08-06 +1704-08-14 +1704-08-19 +1704-09-25 +1704-09-25 +1704-09-25 +1704-09-25 +1704-09-25 +1704-11-23 +1704-12-21 +1705-02-22 +1705-04-17 +1705-04-25 +1705-04-25 +1705-04-25 +1705-04-25 +1705-06-08 +1705-08-05 +1705-12-03 +1706-01-10 +1706-02-12 +1706-06-10 +1706-06-10 +1706-06-20 +1706-06-22 +1706-06-24 +1706-07-12 +1706-07-23 +1706-08-07 +1706-08-10 +1706-08-31 +1706-09-24 +1706-11-14 +1706-11-30 +1706-12-23 1706-12-24 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1706-12-25 -1707-03-05 -1707-04-17 -1707-05-06 -1707-11-23 -1708-02-13 -1708-04-14 -1708-06-25 -1708-07-22 -1708-08-06 -1708-09-22 -1708-10-16 -1708-11-05 -1708-12-31 -1709-03-02 -1709-04-22 -1709-05-15 -1709-09-16 -1710-01-08 -1710-04-30 -1710-05-29 -1710-06-22 -1710-08-02 -1710-09-09 -1710-09-30 -1710-11-25 -1711-01-13 -1711-05-11 -1711-05-24 -1711-08-05 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-09-27 -1711-10-20 -1711-12-05 -1712-02-02 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1706-12-24 +1707-03-04 +1707-04-16 +1707-05-05 +1707-11-22 +1708-02-12 +1708-04-13 +1708-06-24 +1708-07-21 +1708-08-05 +1708-09-21 +1708-10-15 +1708-11-04 +1708-12-30 +1709-03-01 +1709-04-21 +1709-05-14 +1709-09-15 +1710-01-07 +1710-04-29 +1710-05-28 +1710-06-21 +1710-08-01 +1710-09-08 +1710-09-29 +1710-11-24 +1711-01-12 +1711-05-10 +1711-05-23 +1711-08-04 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-09-26 +1711-10-19 +1711-12-04 +1712-02-01 +1712-03-22 1712-03-23 -1712-03-24 -1712-03-26 -1712-05-14 -1712-10-10 -1712-10-10 -1712-10-10 -1712-10-10 -1712-10-10 -1712-12-19 -1713-02-24 -1713-06-02 -1713-06-22 -1713-07-06 -1713-08-19 -1713-08-24 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-01 -1714-10-27 -1714-12-17 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-01-08 -1715-03-05 -1715-03-09 -1715-06-22 -1715-07-25 -1715-09-20 -1715-11-10 -1716-05-30 -1716-06-03 -1716-06-07 -1716-07-19 +1712-03-25 +1712-05-13 +1712-10-09 +1712-10-09 +1712-10-09 +1712-10-09 +1712-10-09 +1712-12-18 +1713-02-23 +1713-06-01 +1713-06-21 +1713-07-05 +1713-08-18 +1713-08-23 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-09-30 +1714-10-26 +1714-12-16 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-01-07 +1715-03-04 +1715-03-08 +1715-06-21 +1715-07-24 +1715-09-19 +1715-11-09 +1716-05-29 +1716-06-02 +1716-06-06 +1716-07-18 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 +1717-02-14 1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-15 -1717-02-16 -1717-02-18 -1717-07-30 -1717-10-18 -1717-11-06 -1717-12-01 -1717-12-15 -1717-12-26 -1717-12-31 -1718-02-08 -1718-02-25 -1718-03-12 -1718-03-28 -1718-09-09 -1718-10-10 -1718-10-10 -1718-10-10 -1718-10-10 -1718-10-10 -1718-10-10 -1718-11-12 -1718-11-15 -1718-12-29 -1719-04-17 -1719-04-22 -1719-05-16 -1719-08-24 -1719-09-22 -1719-12-09 -1719-12-26 -1720-03-02 -1720-03-02 -1720-03-02 -1720-03-02 -1720-03-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-06-02 -1720-09-05 -1720-10-24 -1720-10-24 -1720-10-24 -1720-10-24 -1720-10-24 -1721-01-23 -1721-03-13 -1721-04-30 -1721-05-14 -1721-06-07 -1721-06-19 -1721-07-12 -1721-08-18 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-01-25 -1722-02-10 +1717-02-17 +1717-07-29 +1717-10-17 +1717-11-05 +1717-11-30 +1717-12-14 +1717-12-25 +1717-12-30 +1718-02-07 +1718-02-24 +1718-03-11 +1718-03-27 +1718-09-08 +1718-10-09 +1718-10-09 +1718-10-09 +1718-10-09 +1718-10-09 +1718-10-09 +1718-11-11 +1718-11-14 +1718-12-28 +1719-04-16 +1719-04-21 +1719-05-15 +1719-08-23 +1719-09-21 +1719-12-08 +1719-12-25 +1720-03-01 +1720-03-01 +1720-03-01 +1720-03-01 +1720-03-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-06-01 +1720-09-04 +1720-10-23 +1720-10-23 +1720-10-23 +1720-10-23 +1720-10-23 +1721-01-22 +1721-03-12 +1721-04-29 +1721-05-13 +1721-06-06 +1721-06-18 +1721-07-11 +1721-08-17 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-01-24 +1722-02-09 +1722-02-24 1722-02-25 -1722-02-26 -1722-06-14 -1722-07-15 -1722-10-01 -1722-12-01 -1722-12-14 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-01-22 -1723-03-04 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-03-06 -1723-05-19 -1723-06-21 -1723-07-24 -1723-08-02 -1723-10-30 -1723-12-01 -1724-01-26 +1722-06-13 +1722-07-14 +1722-09-30 +1722-11-30 +1722-12-13 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-01-21 +1723-03-03 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-03-05 +1723-05-18 +1723-06-20 +1723-07-23 +1723-08-01 +1723-10-29 +1723-11-30 +1724-01-25 +1724-03-26 1724-03-27 -1724-03-28 -1724-04-15 -1724-05-20 -1724-07-11 -1724-08-23 -1724-10-09 -1724-12-07 -1725-03-19 -1725-06-01 -1725-08-04 -1726-04-02 -1726-07-06 -1726-07-06 -1726-07-21 -1726-10-31 -1727-06-18 -1727-07-14 -1727-07-23 -1727-07-26 -1727-08-22 -1728-02-15 -1728-03-07 -1728-09-19 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-07 -1728-11-10 -1728-12-17 -1729-04-09 -1729-04-30 -1729-05-09 -1729-06-23 -1729-08-14 -1729-08-29 -1729-11-07 -1729-12-13 -1730-02-03 -1730-02-12 -1730-04-16 -1730-05-14 -1730-08-27 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-02-05 -1731-04-14 -1731-06-28 -1731-08-09 -1731-08-09 -1731-08-09 -1731-08-09 -1731-10-07 -1731-10-31 -1732-01-21 -1732-01-27 -1732-02-08 -1732-02-19 -1732-02-23 -1732-03-05 -1732-04-27 -1732-06-26 -1732-07-11 -1732-07-22 -1732-08-31 -1732-12-01 -1733-06-23 -1733-09-04 -1733-09-08 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1733-11-15 -1734-02-22 -1734-03-02 -1734-03-20 -1734-06-16 -1734-07-03 +1724-04-14 +1724-05-19 +1724-07-10 +1724-08-22 +1724-10-08 +1724-12-06 +1725-03-18 +1725-05-31 +1725-08-03 +1726-04-01 +1726-07-05 +1726-07-05 +1726-07-20 +1726-10-30 +1727-06-17 +1727-07-13 +1727-07-22 +1727-07-25 +1727-08-21 +1728-02-14 +1728-03-06 +1728-09-18 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-06 +1728-11-09 +1728-12-16 +1729-04-08 +1729-04-29 +1729-05-08 +1729-06-22 +1729-08-13 +1729-08-28 +1729-11-06 +1729-12-12 +1730-02-02 +1730-02-11 +1730-04-15 +1730-05-13 +1730-08-26 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-02-04 +1731-04-13 +1731-06-27 +1731-08-08 +1731-08-08 +1731-08-08 +1731-08-08 +1731-10-06 +1731-10-30 +1732-01-20 +1732-01-26 +1732-02-07 +1732-02-18 +1732-02-22 +1732-03-04 +1732-04-26 +1732-06-25 +1732-07-10 +1732-07-21 +1732-08-30 +1732-11-30 +1733-06-22 +1733-09-03 +1733-09-07 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1733-11-14 +1734-02-21 +1734-03-01 +1734-03-19 +1734-06-15 +1734-07-02 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 +1734-08-12 1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-13 -1734-08-14 -1734-10-24 -1734-12-10 -1735-01-31 -1735-02-11 -1735-02-15 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-07-10 -1735-09-04 -1735-09-16 -1735-09-28 -1735-11-29 -1735-12-04 -1735-12-12 -1736-04-13 -1736-04-28 -1736-06-24 -1736-09-28 -1736-11-14 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1736-11-21 -1737-02-23 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-03-02 -1737-05-15 -1737-06-28 -1737-06-30 -1737-07-05 -1737-07-17 +1734-10-23 +1734-12-09 +1735-01-30 +1735-02-10 +1735-02-14 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-07-09 +1735-09-03 +1735-09-15 +1735-09-27 +1735-11-28 +1735-12-03 +1735-12-11 +1736-04-12 +1736-04-27 +1736-06-23 +1736-09-27 +1736-11-13 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1736-11-20 +1737-02-22 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-03-01 +1737-05-14 +1737-06-27 +1737-06-29 +1737-07-04 +1737-07-16 +1737-08-01 1737-08-02 -1737-08-03 -1737-11-06 -1737-12-09 -1738-01-25 -1738-04-05 -1738-06-01 -1738-06-05 -1738-10-25 -1738-10-25 -1738-10-25 -1738-10-25 -1738-10-25 -1738-10-25 -1739-02-11 -1739-02-19 -1739-02-19 -1739-02-19 -1739-02-19 -1739-02-28 -1739-07-05 -1739-09-04 -1740-01-10 -1740-01-13 -1740-01-13 -1740-01-13 -1740-01-13 -1740-02-07 -1740-03-23 +1737-11-05 +1737-12-08 +1738-01-24 +1738-04-04 +1738-05-31 +1738-06-04 +1738-10-24 +1738-10-24 +1738-10-24 +1738-10-24 +1738-10-24 +1738-10-24 +1739-02-10 +1739-02-18 +1739-02-18 +1739-02-18 +1739-02-18 +1739-02-27 +1739-07-04 +1739-09-03 +1740-01-09 +1740-01-12 +1740-01-12 +1740-01-12 +1740-01-12 +1740-02-06 +1740-03-22 +1740-04-18 1740-04-19 -1740-04-20 -1740-07-13 -1740-11-24 -1740-11-28 -1741-04-14 -1741-06-02 -1741-08-16 -1741-08-27 -1741-09-11 -1741-11-26 -1741-11-26 -1741-12-31 -1742-06-07 -1742-12-09 -1742-12-18 -1742-12-26 -1743-01-11 -1743-01-16 -1743-01-16 -1743-01-16 -1743-01-20 -1743-02-03 -1743-02-10 -1743-12-14 -1744-01-03 -1744-04-14 -1744-09-14 -1744-09-19 -1744-09-24 -1744-12-05 -1744-12-05 -1744-12-05 -1744-12-05 -1745-02-09 -1745-03-15 -1745-05-13 -1745-06-13 -1745-08-21 -1745-10-28 -1745-10-30 -1746-01-20 -1746-01-26 -1746-02-16 -1746-03-18 -1746-07-28 -1746-09-18 -1746-10-02 -1746-12-21 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-03-05 -1747-04-16 -1747-06-09 -1747-07-29 -1747-09-24 -1747-11-08 -1747-11-27 -1748-03-21 -1748-04-05 -1748-04-14 -1748-04-25 -1748-08-09 -1749-06-27 -1749-10-24 -1750-04-20 -1750-04-27 -1750-05-29 -1750-07-04 -1750-08-01 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-16 -1750-08-19 -1750-11-22 -1750-12-23 -1750-12-26 -1751-03-01 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-06-21 -1751-08-22 -1751-12-04 -1751-12-07 -1751-12-25 -1752-03-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-08 -1752-04-24 -1752-06-06 -1752-08-14 -1752-12-19 -1752-12-19 -1752-12-19 -1752-12-19 -1752-12-19 -1753-03-01 -1753-03-17 -1753-04-12 -1753-07-10 -1753-07-31 -1753-08-26 -1753-09-09 -1753-10-16 -1753-11-23 -1753-11-26 -1753-12-01 -1753-12-01 -1753-12-01 -1753-12-01 -1753-12-01 -1753-12-01 -1754-04-01 -1754-04-21 -1754-05-29 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-06-25 -1754-07-20 -1754-08-20 -1754-09-04 -1754-12-03 -1755-01-08 -1755-01-11 -1755-02-22 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-03-19 -1755-05-13 -1755-07-25 -1755-07-25 -1755-07-25 -1755-08-30 -1755-09-03 -1755-09-21 -1755-11-19 -1755-12-17 -1756-08-20 -1756-10-24 -1756-11-03 -1757-02-22 -1757-08-07 -1757-09-17 -1757-10-20 -1757-11-10 -1758-05-14 -1758-05-17 -1758-08-11 -1759-01-22 -1759-02-19 -1759-03-05 -1759-03-05 -1759-03-12 -1759-03-20 -1759-04-27 -1759-05-08 -1759-08-02 -1759-08-10 -1759-09-25 -1759-11-10 -1759-11-25 -1759-11-25 -1759-11-25 -1759-11-25 -1759-11-25 -1760-01-11 -1760-03-03 -1760-03-28 -1760-04-17 -1760-09-11 -1761-01-02 -1761-01-02 -1761-01-02 -1761-01-02 -1761-01-10 -1761-06-23 -1761-08-17 -1761-09-30 -1761-11-14 -1761-11-16 -1761-12-02 -1762-05-04 -1762-05-19 -1762-08-28 -1762-11-28 -1762-11-28 -1762-11-28 -1762-11-28 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-04-01 -1763-05-19 -1763-07-17 -1763-07-25 -1763-07-29 -1763-08-05 -1763-12-30 -1764-02-05 -1764-04-29 -1764-07-22 -1764-09-30 -1764-12-01 -1765-01-28 -1765-06-30 -1765-08-22 -1765-11-12 -1766-03-15 -1766-07-23 -1766-09-24 -1766-12-14 -1767-03-25 -1767-04-22 -1767-05-09 -1767-05-16 -1767-05-16 -1767-05-16 -1767-08-10 -1767-11-01 -1767-11-15 -1768-02-02 -1768-06-19 -1768-10-31 -1768-12-22 -1768-12-22 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-07 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-01-16 -1769-02-02 -1769-07-28 -1769-08-20 -1770-03-04 -1770-03-08 -1770-03-21 -1770-08-02 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-08-31 -1770-10-02 -1770-10-25 -1771-04-05 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-04-15 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-05-04 -1771-07-01 -1772-03-12 -1772-04-24 -1772-04-24 -1772-04-24 -1772-04-24 -1772-04-24 -1772-04-24 -1772-05-27 -1772-09-13 -1772-09-15 -1772-12-24 -1772-12-30 -1773-04-08 -1773-06-07 -1773-11-16 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-07 -1774-05-19 -1774-06-15 -1774-07-06 -1774-08-01 -1774-11-02 -1775-04-15 -1775-06-04 -1775-06-16 -1775-07-16 +1740-07-12 +1740-11-23 +1740-11-27 +1741-04-13 +1741-06-01 +1741-08-15 +1741-08-26 +1741-09-10 +1741-11-25 +1741-11-25 +1741-12-30 +1742-06-06 +1742-12-08 +1742-12-17 +1742-12-25 +1743-01-10 +1743-01-15 +1743-01-15 +1743-01-15 +1743-01-19 +1743-02-02 +1743-02-09 +1743-12-13 +1744-01-02 +1744-04-13 +1744-09-13 +1744-09-18 +1744-09-23 +1744-12-04 +1744-12-04 +1744-12-04 +1744-12-04 +1745-02-08 +1745-03-14 +1745-05-12 +1745-06-12 +1745-08-20 +1745-10-27 +1745-10-29 +1746-01-19 +1746-01-25 +1746-02-15 +1746-03-17 +1746-07-27 +1746-09-17 +1746-10-01 +1746-12-20 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-03-04 +1747-04-15 +1747-06-08 +1747-07-28 +1747-09-23 +1747-11-07 +1747-11-26 +1748-03-20 +1748-04-04 +1748-04-13 +1748-04-24 +1748-08-08 +1749-06-26 +1749-10-23 +1750-04-19 +1750-04-26 +1750-05-28 +1750-07-03 +1750-07-31 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-15 +1750-08-18 +1750-11-21 +1750-12-22 +1750-12-25 +1751-02-28 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-06-20 +1751-08-21 +1751-12-03 +1751-12-06 +1751-12-24 +1752-03-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-07 +1752-04-23 +1752-06-05 +1752-08-13 +1752-12-18 +1752-12-18 +1752-12-18 +1752-12-18 +1752-12-18 +1753-02-28 +1753-03-16 +1753-04-11 +1753-07-09 +1753-07-30 +1753-08-25 +1753-09-08 +1753-10-15 +1753-11-22 +1753-11-25 +1753-11-30 +1753-11-30 +1753-11-30 +1753-11-30 +1753-11-30 +1753-11-30 +1754-03-31 +1754-04-20 +1754-05-28 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-06-24 +1754-07-19 +1754-08-19 +1754-09-03 +1754-12-02 +1755-01-07 +1755-01-10 +1755-02-21 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-03-18 +1755-05-12 +1755-07-24 +1755-07-24 +1755-07-24 +1755-08-29 +1755-09-02 +1755-09-20 +1755-11-18 +1755-12-16 +1756-08-19 +1756-10-23 +1756-11-02 +1757-02-21 +1757-08-06 +1757-09-16 +1757-10-19 +1757-11-09 +1758-05-13 +1758-05-16 +1758-08-10 +1759-01-21 +1759-02-18 +1759-03-04 +1759-03-04 +1759-03-11 +1759-03-19 +1759-04-26 +1759-05-07 +1759-08-01 +1759-08-09 +1759-09-24 +1759-11-09 +1759-11-24 +1759-11-24 +1759-11-24 +1759-11-24 +1759-11-24 +1760-01-10 +1760-03-02 +1760-03-27 +1760-04-16 +1760-09-10 +1761-01-01 +1761-01-01 +1761-01-01 +1761-01-01 +1761-01-09 +1761-06-22 +1761-08-16 +1761-09-29 +1761-11-13 +1761-11-15 +1761-12-01 +1762-05-03 +1762-05-18 +1762-08-27 +1762-11-27 +1762-11-27 +1762-11-27 +1762-11-27 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-03-31 +1763-05-18 +1763-07-16 +1763-07-24 +1763-07-28 +1763-08-04 +1763-12-29 +1764-02-04 +1764-04-28 +1764-07-21 +1764-09-29 +1764-11-30 +1765-01-27 +1765-06-29 +1765-08-21 +1765-11-11 +1766-03-14 +1766-07-22 +1766-09-23 +1766-12-13 +1767-03-24 +1767-04-21 +1767-05-08 +1767-05-15 +1767-05-15 +1767-05-15 +1767-08-09 +1767-10-31 +1767-11-14 +1768-02-01 +1768-06-18 +1768-10-30 +1768-12-21 +1768-12-21 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-06 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-01-15 +1769-02-01 +1769-07-27 +1769-08-19 +1770-03-03 +1770-03-07 +1770-03-20 +1770-08-01 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-08-30 +1770-10-01 +1770-10-24 +1771-04-04 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-04-14 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-05-03 +1771-06-30 +1772-03-11 +1772-04-23 +1772-04-23 +1772-04-23 +1772-04-23 +1772-04-23 +1772-04-23 +1772-05-26 +1772-09-12 +1772-09-14 +1772-12-23 +1772-12-29 +1773-04-07 +1773-06-06 +1773-11-15 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-06 +1774-05-18 +1774-06-14 +1774-07-05 +1774-07-31 +1774-11-01 +1775-04-14 +1775-06-03 +1775-06-15 +1775-07-15 +1776-01-28 1776-01-29 -1776-01-30 -1776-09-18 -1776-09-26 -1776-10-09 -1776-10-15 -1776-12-06 -1777-01-28 -1777-01-30 -1777-01-30 -1777-01-30 -1777-01-30 -1777-01-30 -1777-01-30 -1777-04-04 -1777-04-16 -1777-05-26 -1777-06-05 -1778-01-09 -1778-04-25 -1779-01-11 -1779-04-02 -1779-04-10 -1779-04-29 -1779-04-29 -1779-04-29 -1779-04-29 -1779-04-29 -1779-08-02 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1779-10-07 -1780-02-01 -1780-12-11 -1781-02-13 -1781-08-19 -1781-10-10 -1781-11-20 -1782-02-08 -1782-05-17 -1782-06-06 -1782-06-09 -1782-06-20 -1782-07-04 -1782-10-04 -1782-10-10 -1782-10-10 -1782-10-10 -1782-10-10 -1782-10-10 -1782-11-04 -1783-01-15 -1783-05-14 -1783-07-16 -1783-07-16 -1783-07-24 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-13 -1784-02-29 -1784-04-12 -1784-05-09 -1785-01-06 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-02-13 -1785-05-22 -1785-06-09 -1785-06-19 -1785-08-29 -1785-09-30 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1785-10-07 -1786-03-19 -1786-06-08 -1786-08-06 -1786-08-29 -1786-09-23 -1786-09-29 -1786-10-06 -1787-01-28 -1787-02-23 -1787-04-24 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-13 -1787-05-31 -1787-05-31 -1787-05-31 -1787-05-31 -1787-05-31 -1787-05-31 -1787-06-28 -1787-07-07 -1787-08-02 -1787-10-06 -1787-10-19 -1787-10-24 -1787-11-11 -1787-12-19 +1776-09-17 +1776-09-25 +1776-10-08 +1776-10-14 +1776-12-05 +1777-01-27 +1777-01-29 +1777-01-29 +1777-01-29 +1777-01-29 +1777-01-29 +1777-01-29 +1777-04-03 +1777-04-15 +1777-05-25 +1777-06-04 +1778-01-08 +1778-04-24 +1779-01-10 +1779-04-01 +1779-04-09 +1779-04-28 +1779-04-28 +1779-04-28 +1779-04-28 +1779-04-28 +1779-08-01 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1779-10-06 +1780-01-31 +1780-12-10 +1781-02-12 +1781-08-18 +1781-10-09 +1781-11-19 +1782-02-07 +1782-05-16 +1782-06-05 +1782-06-08 +1782-06-19 +1782-07-03 +1782-10-03 +1782-10-09 +1782-10-09 +1782-10-09 +1782-10-09 +1782-10-09 +1782-11-03 +1783-01-14 +1783-05-13 +1783-07-15 +1783-07-15 +1783-07-23 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-12 +1784-02-28 +1784-04-11 +1784-05-08 +1785-01-05 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-02-12 +1785-05-21 +1785-06-08 +1785-06-18 +1785-08-28 +1785-09-29 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1785-10-06 +1786-03-18 +1786-06-07 +1786-08-05 +1786-08-28 +1786-09-22 +1786-09-28 +1786-10-05 +1787-01-27 +1787-02-22 +1787-04-23 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-12 +1787-05-30 +1787-05-30 +1787-05-30 +1787-05-30 +1787-05-30 +1787-05-30 +1787-06-27 +1787-07-06 +1787-08-01 +1787-10-05 +1787-10-18 +1787-10-23 +1787-11-10 +1787-12-18 +1788-04-05 1788-04-06 -1788-04-07 -1788-07-04 -1788-08-06 -1789-01-22 -1789-01-22 -1789-01-22 -1789-02-08 -1789-04-18 -1789-05-10 -1789-08-10 -1790-01-30 -1790-04-19 -1790-10-10 -1791-01-24 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-03-16 -1791-07-20 -1791-10-04 -1792-06-16 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-02-26 -1793-08-20 -1793-08-22 -1793-11-19 -1794-03-19 -1794-03-19 -1794-03-19 -1794-03-19 -1794-03-19 -1794-04-16 -1794-04-20 -1794-05-15 -1794-07-03 -1794-08-13 -1794-09-02 -1794-09-24 -1794-10-16 -1794-11-02 -1794-11-14 -1795-01-17 -1795-03-09 -1795-05-27 -1795-05-27 -1795-05-27 -1795-05-27 -1795-05-27 +1788-07-03 +1788-08-05 +1789-01-21 +1789-01-21 +1789-01-21 +1789-02-07 +1789-04-17 +1789-05-09 +1789-08-09 +1790-01-29 +1790-04-18 +1790-10-09 +1791-01-23 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-03-15 +1791-07-19 +1791-10-03 +1792-06-15 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-02-25 +1793-08-19 +1793-08-21 +1793-11-18 +1794-03-18 +1794-03-18 +1794-03-18 +1794-03-18 +1794-03-18 +1794-04-15 +1794-04-19 +1794-05-14 +1794-07-02 +1794-08-12 +1794-09-01 +1794-09-23 +1794-10-15 +1794-11-01 +1794-11-13 +1795-01-16 +1795-03-08 +1795-05-26 +1795-05-26 +1795-05-26 +1795-05-26 +1795-05-26 +1797-01-03 1797-01-04 -1797-01-05 -1797-09-29 -1797-10-01 -1798-02-14 -1798-09-28 -1799-04-29 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-08-19 -1799-12-02 -1800-03-30 -1800-04-05 -1800-04-23 -1800-09-04 -1800-10-01 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1800-11-14 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-03-13 -1801-11-25 -1801-12-15 -1802-03-30 -1802-04-11 -1802-06-01 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-07-08 -1802-08-13 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-23 -1802-08-30 -1802-11-26 -1802-12-13 -1803-02-04 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-11 -1803-06-14 -1803-07-11 -1803-12-02 -1803-12-08 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-01 -1804-03-04 -1804-03-08 -1804-07-16 -1804-10-23 -1804-12-28 -1805-01-27 -1805-03-19 -1805-07-15 -1805-07-20 -1805-10-23 -1805-10-23 -1805-10-23 -1805-10-23 -1805-10-23 -1806-01-02 -1806-02-10 -1806-10-12 -1807-02-18 -1807-02-23 -1807-03-09 -1807-06-15 -1807-07-09 -1807-09-02 -1807-10-25 -1807-10-29 -1807-12-29 -1808-03-03 -1808-03-13 -1808-05-10 -1808-07-02 -1808-09-10 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-03-27 -1809-04-23 -1809-06-06 -1809-09-15 -1809-09-18 -1809-12-24 -1810-08-17 -1810-08-17 -1811-01-27 -1811-01-27 -1811-01-27 -1811-01-27 -1811-04-10 -1811-04-27 -1811-05-31 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-06-25 -1811-08-14 -1812-01-17 -1812-05-25 -1812-06-11 -1812-10-16 -1812-12-24 -1813-02-04 -1813-03-18 -1813-04-11 -1813-07-09 -1813-08-20 -1813-10-20 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-01-30 -1814-04-26 -1814-05-28 -1814-11-09 -1814-11-20 -1814-12-21 -1815-01-16 -1815-02-23 -1815-03-10 -1815-04-30 -1815-07-30 -1816-01-13 -1816-02-13 -1816-03-13 -1816-08-03 -1816-08-12 -1816-12-25 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-10 -1817-04-17 -1817-05-15 -1817-05-20 -1817-06-02 -1817-07-02 -1817-07-12 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 -1817-08-14 +1797-09-28 +1797-09-30 +1798-02-13 +1798-09-27 +1799-04-28 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-08-18 +1799-12-01 +1800-03-29 +1800-04-04 +1800-04-22 +1800-09-03 +1800-09-30 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1800-11-13 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-03-12 +1801-11-24 +1801-12-14 +1802-03-29 +1802-04-10 +1802-05-31 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-07-07 +1802-08-12 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-22 +1802-08-29 +1802-11-25 +1802-12-12 +1803-02-03 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-10 +1803-06-13 +1803-07-10 +1803-12-01 +1803-12-07 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-02-29 +1804-03-03 +1804-03-07 +1804-07-15 +1804-10-22 +1804-12-27 +1805-01-26 +1805-03-18 +1805-07-14 +1805-07-19 +1805-10-22 +1805-10-22 +1805-10-22 +1805-10-22 +1805-10-22 +1806-01-01 +1806-02-09 +1806-10-11 +1807-02-17 +1807-02-22 +1807-03-08 +1807-06-14 +1807-07-08 +1807-09-01 +1807-10-24 +1807-10-28 +1807-12-28 +1808-03-02 +1808-03-12 +1808-05-09 +1808-07-01 +1808-09-09 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-03-26 +1809-04-22 +1809-06-05 +1809-09-14 +1809-09-17 +1809-12-23 +1810-08-16 +1810-08-16 +1811-01-26 +1811-01-26 +1811-01-26 +1811-01-26 +1811-04-09 +1811-04-26 +1811-05-30 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-06-24 +1811-08-13 +1812-01-16 +1812-05-24 +1812-06-10 +1812-10-15 +1812-12-23 +1813-02-03 +1813-03-17 +1813-04-10 +1813-07-08 +1813-08-19 +1813-10-19 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-01-29 +1814-04-25 +1814-05-27 +1814-11-08 +1814-11-19 +1814-12-20 +1815-01-15 +1815-02-22 +1815-03-09 +1815-04-29 +1815-07-29 +1816-01-12 +1816-02-12 +1816-03-12 +1816-08-02 +1816-08-11 +1816-12-24 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-09 +1817-04-16 +1817-05-14 +1817-05-19 +1817-06-01 +1817-07-01 +1817-07-11 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-08-13 +1817-10-14 1817-10-15 -1817-10-16 -1817-12-18 -1818-01-01 -1818-02-25 -1818-05-26 -1818-06-02 -1818-07-17 -1818-08-13 -1818-09-21 -1818-09-23 -1818-09-25 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-04-07 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-02 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-09 -1819-07-22 -1819-11-19 -1820-04-09 -1820-04-11 -1820-06-04 -1820-07-14 -1820-08-05 -1820-10-01 -1820-10-03 -1820-11-01 -1820-11-29 -1820-11-29 -1820-11-29 -1820-11-29 -1820-11-29 -1820-11-29 -1821-01-07 -1821-01-12 -1821-05-16 -1821-07-26 -1821-08-14 -1821-10-11 -1821-11-10 -1821-12-08 -1822-01-22 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-06 -1822-07-16 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-07-24 -1822-08-19 -1822-11-23 -1822-12-03 -1823-01-31 -1823-03-15 -1823-03-18 +1817-12-17 +1817-12-31 +1818-02-24 +1818-05-25 +1818-06-01 +1818-07-16 +1818-08-12 +1818-09-20 +1818-09-22 +1818-09-24 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-04-06 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-01 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-08 +1819-07-21 +1819-11-18 +1820-04-08 +1820-04-10 +1820-06-03 +1820-07-13 +1820-08-04 +1820-09-30 +1820-10-02 +1820-10-31 +1820-11-28 +1820-11-28 +1820-11-28 +1820-11-28 +1820-11-28 +1820-11-28 +1821-01-06 +1821-01-11 +1821-05-15 +1821-07-25 +1821-08-13 +1821-10-10 +1821-11-09 +1821-12-07 +1822-01-21 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-05 +1822-07-15 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-07-23 +1822-08-18 +1822-11-22 +1822-12-02 +1823-01-30 +1823-03-14 +1823-03-17 +1823-05-12 1823-05-13 -1823-05-14 -1823-06-03 -1823-08-06 -1823-10-08 -1824-01-08 -1824-01-21 -1824-02-08 -1824-06-29 -1824-07-10 -1824-08-13 -1824-08-25 -1824-09-16 -1825-02-06 -1825-02-19 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-04-24 -1825-06-18 -1825-08-24 -1825-08-28 -1825-11-06 -1825-12-27 -1826-01-16 -1826-07-25 -1826-11-10 -1826-11-29 -1827-05-13 -1827-06-12 -1827-06-22 -1827-07-05 -1827-07-23 -1827-07-23 -1827-07-23 -1827-08-07 -1827-12-13 -1828-02-14 -1828-06-20 -1828-10-14 -1829-02-17 -1829-07-24 -1829-11-09 -1830-05-27 -1830-11-19 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1830-12-09 -1831-01-29 -1831-03-11 -1831-05-26 -1831-07-23 -1831-08-18 -1831-08-21 -1831-09-16 -1831-10-17 -1831-12-18 -1832-11-12 -1833-04-26 -1833-05-08 -1833-05-08 -1833-05-08 -1833-05-08 -1833-05-08 -1833-07-05 -1833-10-07 -1833-11-29 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-12 -1833-12-21 -1834-02-18 -1834-04-22 -1834-05-14 -1834-05-14 -1834-05-14 -1834-05-14 -1834-05-14 -1834-05-14 -1834-10-21 -1834-11-05 -1835-01-03 -1835-06-18 -1835-07-24 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-08-06 -1835-09-05 -1835-12-03 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-01-06 -1836-05-21 -1836-05-27 -1836-09-08 -1836-10-14 -1837-01-18 -1837-03-01 -1837-03-30 -1837-07-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-08-29 -1837-10-17 -1837-12-16 -1838-01-22 -1838-08-04 -1838-12-02 -1839-01-07 -1839-02-02 -1839-04-23 -1839-04-23 -1839-04-23 -1839-04-23 -1839-04-23 -1839-05-14 -1839-05-14 -1839-05-14 -1839-05-14 -1839-05-14 -1839-05-14 -1839-07-28 -1840-01-18 -1840-01-23 -1840-02-03 -1840-02-24 -1840-12-13 -1840-12-19 -1841-04-18 -1841-05-17 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-05-22 -1841-06-04 -1841-12-05 -1842-04-04 -1842-05-18 -1842-05-18 -1842-05-18 -1842-05-18 -1842-05-18 -1842-05-18 -1842-06-03 -1842-12-29 -1843-03-14 -1843-07-08 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-08-20 -1843-10-10 -1843-12-17 -1844-03-26 -1844-07-20 -1844-07-23 -1844-08-22 -1844-09-20 -1844-11-03 -1845-05-08 -1845-05-13 -1845-09-06 -1846-01-05 -1846-02-09 -1846-08-14 -1846-09-19 -1846-12-05 -1847-01-09 +1823-06-02 +1823-08-05 +1823-10-07 +1824-01-07 +1824-01-20 +1824-02-07 +1824-06-28 +1824-07-09 +1824-08-12 +1824-08-24 +1824-09-15 +1825-02-05 +1825-02-18 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-04-23 +1825-06-17 +1825-08-23 +1825-08-27 +1825-11-05 +1825-12-26 +1826-01-15 +1826-07-24 +1826-11-09 +1826-11-28 +1827-05-12 +1827-06-11 +1827-06-21 +1827-07-04 +1827-07-22 +1827-07-22 +1827-07-22 +1827-08-06 +1827-12-12 +1828-02-13 +1828-06-19 +1828-10-13 +1829-02-16 +1829-07-23 +1829-11-08 +1830-05-26 +1830-11-18 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1830-12-08 +1831-01-28 +1831-03-10 +1831-05-25 +1831-07-22 +1831-08-17 +1831-08-20 +1831-09-15 +1831-10-16 +1831-12-17 +1832-11-11 +1833-04-25 +1833-05-07 +1833-05-07 +1833-05-07 +1833-05-07 +1833-05-07 +1833-07-04 +1833-10-06 +1833-11-28 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-11 +1833-12-20 +1834-02-17 +1834-04-21 +1834-05-13 +1834-05-13 +1834-05-13 +1834-05-13 +1834-05-13 +1834-05-13 +1834-10-20 +1834-11-04 +1835-01-02 +1835-06-17 +1835-07-23 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-08-05 +1835-09-04 +1835-12-02 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-01-05 +1836-05-20 +1836-05-26 +1836-09-07 +1836-10-13 +1837-01-17 +1837-02-28 +1837-03-29 +1837-07-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-08-28 +1837-10-16 +1837-12-15 +1838-01-21 +1838-08-03 +1838-12-01 +1839-01-06 +1839-02-01 +1839-04-22 +1839-04-22 +1839-04-22 +1839-04-22 +1839-04-22 +1839-05-13 +1839-05-13 +1839-05-13 +1839-05-13 +1839-05-13 +1839-05-13 +1839-07-27 +1840-01-17 +1840-01-22 +1840-02-02 +1840-02-23 +1840-12-12 +1840-12-18 +1841-04-17 +1841-05-16 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-05-21 +1841-06-03 +1841-12-04 +1842-04-03 +1842-05-17 +1842-05-17 +1842-05-17 +1842-05-17 +1842-05-17 +1842-05-17 +1842-06-02 +1842-12-28 +1843-03-13 +1843-07-07 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-08-19 +1843-10-09 +1843-12-16 +1844-03-25 +1844-07-19 +1844-07-22 +1844-08-21 +1844-09-19 +1844-11-02 +1845-05-07 +1845-05-12 +1845-09-05 +1846-01-04 +1846-02-08 +1846-08-13 +1846-09-18 +1846-12-04 +1847-01-08 +1847-02-24 1847-02-25 -1847-02-26 -1847-04-05 -1847-04-22 -1847-07-26 -1847-08-23 -1848-03-01 -1849-03-08 -1849-03-31 -1849-04-16 -1849-05-22 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-06-05 -1849-08-28 -1849-09-11 -1850-01-21 -1850-03-19 -1850-04-08 -1850-08-30 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-10-21 -1850-12-31 -1851-02-12 -1851-02-12 -1851-02-12 -1851-02-12 -1851-02-12 -1851-03-15 -1851-06-04 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-03 -1851-09-27 -1851-10-08 -1851-11-11 -1852-02-10 -1852-02-20 -1852-04-13 -1852-04-24 -1852-06-15 -1852-09-02 -1852-09-12 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-05 -1852-10-31 -1853-01-26 -1853-07-26 -1853-09-16 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-10-24 -1853-11-21 -1853-12-13 -1853-12-30 -1854-01-31 -1854-02-28 -1854-05-03 -1854-05-30 -1854-05-30 -1854-05-30 -1854-05-30 -1854-05-30 -1854-07-17 -1854-12-22 -1854-12-29 -1855-02-24 -1855-10-31 -1855-11-07 -1855-11-30 -1855-12-24 -1856-01-13 -1856-05-07 -1856-05-20 -1856-05-22 -1856-06-26 -1856-07-12 -1856-10-06 -1856-11-16 -1857-04-14 -1857-05-23 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-06-19 -1857-07-14 -1857-08-14 -1857-10-19 -1858-02-15 -1858-02-24 -1858-07-04 -1858-07-15 -1858-10-25 -1858-10-28 -1859-01-18 -1859-03-08 -1859-03-20 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-03-26 -1859-04-10 -1859-05-23 -1859-08-31 -1859-09-17 -1859-09-17 -1859-09-17 -1859-11-21 -1859-12-31 -1860-03-10 -1860-03-12 -1860-05-15 -1860-08-22 -1860-09-19 -1860-12-03 -1861-04-23 -1861-08-14 -1861-12-06 -1861-12-19 -1862-01-12 -1862-03-01 -1862-03-20 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-03-30 -1862-06-26 -1863-02-22 -1863-06-17 -1863-09-12 -1863-12-27 -1863-12-29 -1864-02-24 -1864-07-19 -1864-08-12 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-07-03 -1865-12-04 -1865-12-17 -1866-01-04 -1866-01-10 -1866-03-29 -1866-04-07 -1866-04-14 -1866-09-03 -1866-10-21 -1867-05-11 -1867-06-21 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-08-29 -1867-09-03 -1867-09-26 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-01 -1867-10-27 -1867-11-10 -1867-11-12 -1867-11-12 -1867-11-12 -1867-11-12 -1867-11-12 -1867-11-25 -1869-01-21 -1869-01-27 -1869-02-18 -1869-03-09 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-07-05 -1869-12-03 -1870-01-17 -1870-07-27 -1870-08-21 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-02 -1870-09-21 -1870-10-08 -1870-11-19 -1871-01-22 -1871-01-27 -1871-02-14 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-02-27 -1871-03-31 -1871-07-29 -1871-08-01 -1871-08-03 -1871-08-17 -1872-02-16 -1872-05-28 -1872-06-06 -1872-07-26 -1872-10-09 -1873-02-27 -1873-05-29 -1873-07-02 -1873-07-12 -1873-07-15 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-17 -1873-07-27 -1873-07-30 -1873-09-06 -1873-09-06 -1873-09-06 -1873-09-06 -1873-12-23 -1874-01-02 -1874-04-04 -1874-04-14 -1874-07-09 -1874-07-21 -1874-09-24 -1874-10-24 -1874-11-26 -1874-12-16 -1875-03-28 -1875-04-24 -1875-05-12 -1875-07-18 -1875-08-16 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-10-15 -1875-11-02 -1875-11-27 -1876-08-16 -1876-10-18 -1876-12-15 -1876-12-28 -1877-03-02 -1877-03-06 -1877-03-19 -1877-06-22 -1877-07-17 -1877-09-01 -1877-10-04 -1878-01-07 -1878-02-02 -1878-04-11 -1878-04-30 -1878-06-26 -1878-10-17 -1878-10-21 -1878-11-17 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-02-05 -1879-03-10 -1879-07-23 -1879-09-16 -1879-12-02 -1880-05-30 +1847-04-04 +1847-04-21 +1847-07-25 +1847-08-22 +1848-02-29 +1849-03-07 +1849-03-30 +1849-04-15 +1849-05-21 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-06-04 +1849-08-27 +1849-09-10 +1850-01-20 +1850-03-18 +1850-04-07 +1850-08-29 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-10-20 +1850-12-30 +1851-02-11 +1851-02-11 +1851-02-11 +1851-02-11 +1851-02-11 +1851-03-14 +1851-06-03 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-02 +1851-09-26 +1851-10-07 +1851-11-10 +1852-02-09 +1852-02-19 +1852-04-12 +1852-04-23 +1852-06-14 +1852-09-01 +1852-09-11 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-04 +1852-10-30 +1853-01-25 +1853-07-25 +1853-09-15 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-10-23 +1853-11-20 +1853-12-12 +1853-12-29 +1854-01-30 +1854-02-27 +1854-05-02 +1854-05-29 +1854-05-29 +1854-05-29 +1854-05-29 +1854-05-29 +1854-07-16 +1854-12-21 +1854-12-28 +1855-02-23 +1855-10-30 +1855-11-06 +1855-11-29 +1855-12-23 +1856-01-12 +1856-05-06 +1856-05-19 +1856-05-21 +1856-06-25 +1856-07-11 +1856-10-05 +1856-11-15 +1857-04-13 +1857-05-22 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-06-18 +1857-07-13 +1857-08-13 +1857-10-18 +1858-02-14 +1858-02-23 +1858-07-03 +1858-07-14 +1858-10-24 +1858-10-27 +1859-01-17 +1859-03-07 +1859-03-19 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-03-25 +1859-04-09 +1859-05-22 +1859-08-30 +1859-09-16 +1859-09-16 +1859-09-16 +1859-11-20 +1859-12-30 +1860-03-09 +1860-03-11 +1860-05-14 +1860-08-21 +1860-09-18 +1860-12-02 +1861-04-22 +1861-08-13 +1861-12-05 +1861-12-18 +1862-01-11 +1862-02-28 +1862-03-19 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-03-29 +1862-06-25 +1863-02-21 +1863-06-16 +1863-09-11 +1863-12-26 +1863-12-28 +1864-02-23 +1864-07-18 +1864-08-11 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-07-02 +1865-12-03 +1865-12-16 +1866-01-03 +1866-01-09 +1866-03-28 +1866-04-06 +1866-04-13 +1866-09-02 +1866-10-20 +1867-05-10 +1867-06-20 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-08-28 +1867-09-02 +1867-09-25 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-09-30 +1867-10-26 +1867-11-09 +1867-11-11 +1867-11-11 +1867-11-11 +1867-11-11 +1867-11-11 +1867-11-24 +1869-01-20 +1869-01-26 +1869-02-17 +1869-03-08 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-07-04 +1869-12-02 +1870-01-16 +1870-07-26 +1870-08-20 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-01 +1870-09-20 +1870-10-07 +1870-11-18 +1871-01-21 +1871-01-26 +1871-02-13 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-02-26 +1871-03-30 +1871-07-28 +1871-07-31 +1871-08-02 +1871-08-16 +1872-02-15 +1872-05-27 +1872-06-05 +1872-07-25 +1872-10-08 +1873-02-26 +1873-05-28 +1873-07-01 +1873-07-11 +1873-07-14 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-16 +1873-07-26 +1873-07-29 +1873-09-05 +1873-09-05 +1873-09-05 +1873-09-05 +1873-12-22 +1874-01-01 +1874-04-03 +1874-04-13 +1874-07-08 +1874-07-20 +1874-09-23 +1874-10-23 +1874-11-25 +1874-12-15 +1875-03-27 +1875-04-23 +1875-05-11 +1875-07-17 +1875-08-15 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-10-14 +1875-11-01 +1875-11-26 +1876-08-15 +1876-10-17 +1876-12-14 +1876-12-27 +1877-03-01 +1877-03-05 +1877-03-18 +1877-06-21 +1877-07-16 +1877-08-31 +1877-10-03 +1878-01-06 +1878-02-01 +1878-04-10 +1878-04-29 +1878-06-25 +1878-10-16 +1878-10-20 +1878-11-16 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-02-04 +1879-03-09 +1879-07-22 +1879-09-15 +1879-12-01 +1880-05-29 +1880-06-29 1880-06-30 -1880-07-01 -1880-11-03 -1880-11-30 -1880-12-07 -1881-01-23 -1881-01-27 -1881-05-03 -1881-05-15 -1881-06-23 -1881-07-12 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-07-31 -1881-09-13 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-09 -1882-02-14 -1882-05-01 -1882-05-25 -1882-07-22 -1882-11-09 -1883-04-11 -1883-05-26 -1883-06-01 -1883-10-14 -1883-10-20 -1883-10-29 -1883-10-29 -1883-10-29 -1883-10-29 -1883-10-29 -1884-08-05 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-08-11 -1884-09-23 -1884-11-05 -1884-11-20 -1884-12-15 -1885-01-03 -1885-01-22 -1885-02-20 -1885-05-25 -1885-06-21 -1885-08-08 -1886-03-21 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-05 -1886-04-07 -1886-04-25 -1886-06-01 -1886-07-25 -1886-11-10 -1886-12-02 -1887-01-16 -1887-06-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-07-11 -1887-10-26 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-11-27 -1887-12-08 -1888-01-15 -1888-02-11 -1888-08-08 -1888-11-03 -1888-11-15 -1889-03-10 -1889-06-06 -1889-06-13 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-08-14 -1889-09-20 -1890-02-24 -1890-04-29 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-07-15 -1890-09-15 +1880-11-02 +1880-11-29 +1880-12-06 +1881-01-22 +1881-01-26 +1881-05-02 +1881-05-14 +1881-06-22 +1881-07-11 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-07-30 +1881-09-12 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-08 +1882-02-13 +1882-04-30 +1882-05-24 +1882-07-21 +1882-11-08 +1883-04-10 +1883-05-25 +1883-05-31 +1883-10-13 +1883-10-19 +1883-10-28 +1883-10-28 +1883-10-28 +1883-10-28 +1883-10-28 +1884-08-04 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-08-10 +1884-09-22 +1884-11-04 +1884-11-19 +1884-12-14 +1885-01-02 +1885-01-21 +1885-02-19 +1885-05-24 +1885-06-20 +1885-08-07 +1886-03-20 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-04 +1886-04-06 +1886-04-24 +1886-05-31 +1886-07-24 +1886-11-09 +1886-12-01 +1887-01-15 +1887-06-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-07-10 +1887-10-25 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-11-26 +1887-12-07 +1888-01-14 +1888-02-10 +1888-08-07 +1888-11-02 +1888-11-14 +1889-03-09 +1889-06-05 +1889-06-12 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-08-13 +1889-09-19 +1890-02-23 +1890-04-28 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-07-14 +1890-09-14 +1890-10-04 1890-10-05 -1890-10-06 -1890-11-10 -1891-01-10 -1891-01-13 -1891-06-20 -1891-07-13 -1892-03-26 -1892-05-13 -1892-09-10 -1893-01-18 -1893-07-10 +1890-11-09 +1891-01-09 +1891-01-12 +1891-06-19 +1891-07-12 +1892-03-25 +1892-05-12 +1892-09-09 +1893-01-17 +1893-07-09 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 +1893-07-18 1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-19 -1893-07-20 -1894-06-05 -1894-06-18 -1894-10-01 -1894-10-10 -1894-11-28 -1895-01-20 -1895-02-07 -1895-09-04 -1895-11-01 -1895-12-13 -1895-12-31 -1896-01-02 -1896-01-13 -1896-01-26 -1896-02-26 -1896-03-09 -1896-05-05 -1896-05-10 -1896-08-08 -1896-08-14 -1896-08-25 -1897-02-21 -1897-06-09 -1897-06-12 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-08-28 -1897-11-03 -1897-12-01 -1898-02-23 -1898-02-27 -1898-03-06 -1898-04-13 -1898-05-22 -1898-06-21 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1898-12-18 -1899-01-30 -1899-02-16 -1899-03-01 -1899-06-24 -1899-08-27 -1899-10-20 -1900-04-27 -1900-09-17 -1900-10-19 -1901-02-10 -1901-04-13 -1901-05-12 -1901-05-12 -1901-05-12 -1901-05-12 -1901-05-29 -1901-10-05 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1901-12-23 -1902-01-24 -1902-05-14 -1902-06-14 -1902-07-29 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-04 -1903-01-17 -1903-03-20 -1903-04-23 -1903-05-12 -1904-05-24 -1904-06-30 -1904-08-20 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-08-28 -1904-10-14 -1904-11-30 -1905-03-27 -1905-06-07 -1905-06-15 -1905-07-20 -1905-09-05 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-12 -1905-09-23 -1905-12-15 -1906-01-11 -1906-07-25 -1906-08-27 -1906-09-02 -1906-11-02 -1906-12-13 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1907-05-24 -1908-01-24 -1908-03-28 -1908-05-03 -1908-05-28 -1908-06-27 -1908-06-29 -1908-12-19 -1909-11-21 -1910-01-23 -1910-02-16 -1910-03-05 -1910-03-15 -1910-04-06 -1910-05-12 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-05-28 -1910-06-18 -1910-08-17 -1910-11-06 -1911-05-05 -1911-06-22 -1911-11-19 -1912-04-14 -1912-05-02 -1912-06-12 -1913-01-30 -1913-02-12 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-06-01 -1913-07-14 -1913-09-26 -1913-09-26 -1913-09-26 -1913-10-24 -1913-12-15 -1914-02-18 -1914-08-19 -1915-02-05 -1915-03-05 -1915-08-10 -1915-08-15 -1915-11-05 -1915-12-12 -1915-12-18 -1916-05-05 -1916-05-12 -1916-06-07 -1916-06-11 +1894-06-04 +1894-06-17 +1894-09-30 +1894-10-09 +1894-11-27 +1895-01-19 +1895-02-06 +1895-09-03 +1895-10-31 +1895-12-12 +1895-12-30 +1896-01-01 +1896-01-12 +1896-01-25 +1896-02-25 +1896-03-08 +1896-05-04 +1896-05-09 +1896-08-07 +1896-08-13 +1896-08-24 +1897-02-20 +1897-06-08 +1897-06-11 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-08-27 +1897-11-02 +1897-11-30 +1898-02-22 +1898-02-26 +1898-03-05 +1898-04-12 +1898-05-21 +1898-06-20 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1898-12-17 +1899-01-29 +1899-02-15 +1899-02-28 +1899-06-23 +1899-08-26 +1899-10-19 +1900-04-26 +1900-09-16 +1900-10-18 +1901-02-09 +1901-04-12 +1901-05-11 +1901-05-11 +1901-05-11 +1901-05-11 +1901-05-28 +1901-10-04 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1901-12-22 +1902-01-23 +1902-05-13 +1902-06-13 +1902-07-28 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-03 +1903-01-16 +1903-03-19 +1903-04-22 +1903-05-11 +1904-05-23 +1904-06-29 +1904-08-19 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-08-27 +1904-10-13 +1904-11-29 +1905-03-26 +1905-06-06 +1905-06-14 +1905-07-19 +1905-09-04 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-11 +1905-09-22 +1905-12-14 +1906-01-10 +1906-07-24 +1906-08-26 +1906-09-01 +1906-11-01 +1906-12-12 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1907-05-23 +1908-01-23 +1908-03-27 +1908-05-02 +1908-05-27 +1908-06-26 +1908-06-28 +1908-12-18 +1909-11-20 +1910-01-22 +1910-02-15 +1910-03-04 +1910-03-14 +1910-04-05 +1910-05-11 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-05-27 +1910-06-17 +1910-08-16 +1910-11-05 +1911-05-04 +1911-06-21 +1911-11-18 +1912-04-13 +1912-05-01 +1912-06-11 +1913-01-29 +1913-02-11 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-05-31 +1913-07-13 +1913-09-25 +1913-09-25 +1913-09-25 +1913-10-23 +1913-12-14 +1914-02-17 +1914-08-18 +1915-02-04 +1915-03-04 +1915-08-09 +1915-08-14 +1915-11-04 +1915-12-11 +1915-12-17 +1916-05-04 +1916-05-11 +1916-06-06 +1916-06-10 +1916-08-08 1916-08-09 -1916-08-10 -1917-04-16 -1917-06-28 -1917-12-07 -1918-02-10 -1918-02-10 -1918-02-10 -1918-02-10 -1918-02-10 -1918-02-10 -1918-09-11 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1918-10-31 -1919-02-07 -1919-04-07 -1919-06-26 -1919-08-22 -1919-10-04 -1919-10-04 -1919-10-21 -1920-01-05 -1920-05-06 -1920-06-30 -1920-08-04 -1920-08-18 -1920-10-19 -1921-02-19 -1921-03-14 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-05-28 -1921-06-02 -1921-07-03 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-10-10 -1921-11-04 -1921-11-04 -1921-11-04 -1921-11-04 -1921-11-04 -1921-11-04 -1922-01-15 -1922-04-20 -1922-06-22 -1922-07-22 -1923-01-24 -1923-03-08 -1923-03-24 -1923-05-29 -1923-08-12 -1923-08-31 -1923-09-20 -1923-11-15 -1923-12-16 -1924-01-26 -1924-03-16 -1924-05-06 -1924-06-23 -1924-07-04 -1924-11-22 -1924-12-10 -1925-02-17 -1925-06-05 -1925-09-05 -1925-09-08 -1925-10-23 -1925-12-30 -1926-03-30 -1926-04-10 -1926-05-27 -1926-09-07 -1926-12-07 -1927-02-11 -1927-03-27 -1927-04-05 -1927-04-05 -1927-04-05 -1927-04-05 -1927-04-05 -1927-04-05 -1927-05-16 -1927-07-16 -1927-07-26 -1927-08-26 -1927-09-03 -1927-11-01 -1927-11-28 -1928-01-10 -1928-02-25 -1928-05-11 -1928-07-29 -1928-08-27 -1929-03-07 -1929-04-01 -1929-04-05 -1929-05-29 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1929-10-23 -1930-02-01 -1930-04-09 -1930-04-09 -1930-04-09 -1930-04-09 -1930-06-11 -1930-07-07 -1930-09-28 -1930-12-21 -1931-04-08 -1931-07-01 -1931-08-30 -1931-10-31 -1931-10-31 -1931-12-12 -1932-03-15 -1932-03-30 -1932-04-21 -1932-05-31 -1932-10-27 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-04-13 -1933-08-27 -1933-08-30 -1933-11-30 -1933-12-22 -1934-06-02 -1934-08-13 -1934-09-12 -1934-11-10 -1934-11-10 -1934-11-10 -1934-11-10 -1934-11-10 -1934-11-10 -1935-04-11 -1936-01-31 -1936-06-23 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-07-04 -1936-11-04 -1937-01-22 -1937-02-04 +1917-04-15 +1917-06-27 +1917-12-06 +1918-02-09 +1918-02-09 +1918-02-09 +1918-02-09 +1918-02-09 +1918-02-09 +1918-09-10 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1918-10-30 +1919-02-06 +1919-04-06 +1919-06-25 +1919-08-21 +1919-10-03 +1919-10-03 +1919-10-20 +1920-01-04 +1920-05-05 +1920-06-29 +1920-08-03 +1920-08-17 +1920-10-18 +1921-02-18 +1921-03-13 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-05-27 +1921-06-01 +1921-07-02 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-10-09 +1921-11-03 +1921-11-03 +1921-11-03 +1921-11-03 +1921-11-03 +1921-11-03 +1922-01-14 +1922-04-19 +1922-06-21 +1922-07-21 +1923-01-23 +1923-03-07 +1923-03-23 +1923-05-28 +1923-08-11 +1923-08-30 +1923-09-19 +1923-11-14 +1923-12-15 +1924-01-25 +1924-03-15 +1924-05-05 +1924-06-22 +1924-07-03 +1924-11-21 +1924-12-09 +1925-02-16 +1925-06-04 +1925-09-04 +1925-09-07 +1925-10-22 +1925-12-29 +1926-03-29 +1926-04-09 +1926-05-26 +1926-09-06 +1926-12-06 +1927-02-10 +1927-03-26 +1927-04-04 +1927-04-04 +1927-04-04 +1927-04-04 +1927-04-04 +1927-04-04 +1927-05-15 +1927-07-15 +1927-07-25 +1927-08-25 +1927-09-02 +1927-10-31 +1927-11-27 +1928-01-09 +1928-02-24 +1928-05-10 +1928-07-28 +1928-08-26 +1929-03-06 +1929-03-31 +1929-04-04 +1929-05-28 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1929-10-22 +1930-01-31 +1930-04-08 +1930-04-08 +1930-04-08 +1930-04-08 +1930-06-10 +1930-07-06 +1930-09-27 +1930-12-20 +1931-04-07 +1931-06-30 +1931-08-29 +1931-10-30 +1931-10-30 +1931-12-11 +1932-03-14 +1932-03-29 +1932-04-20 +1932-05-30 +1932-10-26 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-04-12 +1933-08-26 +1933-08-29 +1933-11-29 +1933-12-21 +1934-06-01 +1934-08-12 +1934-09-11 +1934-11-09 +1934-11-09 +1934-11-09 +1934-11-09 +1934-11-09 +1934-11-09 +1935-04-10 +1936-01-30 +1936-06-22 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-07-03 +1936-11-03 +1937-01-21 +1937-02-03 +1937-05-29 1937-05-30 -1937-05-31 -1937-06-26 -1937-09-08 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-06 -1937-11-17 -1938-01-27 +1937-06-25 +1937-09-07 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-05 +1937-11-16 +1938-01-26 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 +1938-02-24 1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-25 -1938-02-26 -1938-05-17 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-09-12 -1938-10-08 -1939-01-24 -1939-02-19 -1939-03-23 -1939-05-05 -1939-07-13 -1939-08-04 +1938-05-16 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-09-11 +1938-10-07 +1939-01-23 +1939-02-18 +1939-03-22 +1939-05-04 +1939-07-12 +1939-08-03 +1940-02-09 1940-02-10 -1940-02-11 -1940-03-27 -1940-04-28 -1940-05-01 -1940-08-07 -1940-08-18 -1940-09-24 -1941-03-24 -1941-04-19 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-09-16 -1941-11-07 -1942-02-16 -1942-03-23 -1943-01-31 -1943-03-30 -1943-05-08 -1943-11-28 -1944-02-28 -1944-05-06 -1945-09-26 -1945-10-08 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1945-11-20 -1946-01-18 -1946-02-02 -1946-02-04 -1946-04-29 -1946-05-11 -1947-01-05 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-02-15 -1947-03-03 -1947-03-26 -1947-05-17 -1947-06-25 -1947-10-11 -1947-12-28 -1948-01-27 -1948-05-20 -1948-06-17 -1948-12-27 -1949-06-26 -1949-09-05 -1949-09-20 -1950-01-23 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-19 -1950-02-23 -1950-04-10 -1950-09-27 -1951-03-31 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-08-10 -1951-09-06 -1952-02-06 -1952-02-13 -1952-06-23 -1953-01-10 -1953-02-10 -1953-02-23 -1953-02-23 -1953-02-23 -1953-02-23 -1953-02-23 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-04-03 -1953-05-19 -1953-07-23 -1953-11-27 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-04 -1953-12-25 -1954-02-21 -1954-05-19 -1954-07-04 -1954-07-07 -1954-10-17 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-11-25 -1954-12-25 -1955-05-10 -1955-07-22 -1955-08-07 -1955-08-24 -1955-09-23 -1955-11-23 -1956-05-18 -1956-08-06 -1956-10-09 -1957-04-20 -1957-05-03 -1957-08-15 -1957-08-24 -1957-09-05 -1958-07-25 -1958-07-25 -1958-07-25 -1958-07-25 -1958-07-25 -1958-10-13 -1958-10-24 -1959-01-13 -1959-01-24 -1959-03-22 -1959-04-30 -1959-09-08 -1959-09-23 -1959-11-23 -1959-12-21 -1960-01-16 -1960-03-18 -1960-04-05 -1960-07-17 -1960-07-25 -1960-08-30 -1960-11-25 -1961-05-15 -1961-07-10 -1961-07-29 -1961-07-29 -1961-09-25 -1961-10-15 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1961-12-18 -1962-06-07 -1962-07-08 -1962-08-18 -1962-09-02 -1963-01-08 -1963-03-31 +1940-03-26 +1940-04-27 +1940-04-30 +1940-08-06 +1940-08-17 +1940-09-23 +1941-03-23 +1941-04-18 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-09-15 +1941-11-06 +1942-02-15 +1942-03-22 +1943-01-30 +1943-03-29 +1943-05-07 +1943-11-27 +1944-02-27 +1944-05-05 +1945-09-25 +1945-10-07 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1945-11-19 +1946-01-17 +1946-02-01 +1946-02-03 +1946-04-28 +1946-05-10 +1947-01-04 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-02-14 +1947-03-02 +1947-03-25 +1947-05-16 +1947-06-24 +1947-10-10 +1947-12-27 +1948-01-26 +1948-05-19 +1948-06-16 +1948-12-26 +1949-06-25 +1949-09-04 +1949-09-19 +1950-01-22 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-18 +1950-02-22 +1950-04-09 +1950-09-26 +1951-03-30 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-08-09 +1951-09-05 +1952-02-05 +1952-02-12 +1952-06-22 +1953-01-09 +1953-02-09 +1953-02-22 +1953-02-22 +1953-02-22 +1953-02-22 +1953-02-22 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-04-02 +1953-05-18 +1953-07-22 +1953-11-26 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-03 +1953-12-24 +1954-02-20 +1954-05-18 +1954-07-03 +1954-07-06 +1954-10-16 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-11-24 +1954-12-24 +1955-05-09 +1955-07-21 +1955-08-06 +1955-08-23 +1955-09-22 +1955-11-22 +1956-05-17 +1956-08-05 +1956-10-08 +1957-04-19 +1957-05-02 +1957-08-14 +1957-08-23 +1957-09-04 +1958-07-24 +1958-07-24 +1958-07-24 +1958-07-24 +1958-07-24 +1958-10-12 +1958-10-23 +1959-01-12 +1959-01-23 +1959-03-21 +1959-04-29 +1959-09-07 +1959-09-22 +1959-11-22 +1959-12-20 +1960-01-15 +1960-03-17 +1960-04-04 +1960-07-16 +1960-07-24 +1960-08-29 +1960-11-24 +1961-05-14 +1961-07-09 +1961-07-28 +1961-07-28 +1961-09-24 +1961-10-14 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1961-12-17 +1962-06-06 +1962-07-07 +1962-08-17 +1962-09-01 +1963-01-07 +1963-03-30 +1964-04-14 1964-04-15 -1964-04-16 -1964-10-20 -1964-10-30 -1964-11-08 -1964-12-14 -1965-03-20 -1965-03-27 -1965-05-04 -1965-06-01 -1965-09-19 -1966-02-15 -1966-08-17 -1966-12-01 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1966-12-29 -1967-03-22 -1967-04-09 -1967-08-09 -1967-09-05 -1968-02-16 +1964-10-19 +1964-10-29 +1964-11-07 +1964-12-13 +1965-03-19 +1965-03-26 +1965-05-03 +1965-05-31 +1965-09-18 +1966-02-14 +1966-08-16 +1966-11-30 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1966-12-28 +1967-03-21 +1967-04-08 +1967-08-08 +1967-09-04 +1968-02-15 1971-09-22 1971-09-25 1972-02-25 Index: ql/src/test/results/clientpositive/vector_decimal_round.q.out =================================================================== --- ql/src/test/results/clientpositive/vector_decimal_round.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/vector_decimal_round.q.out (working copy) @@ -102,17 +102,17 @@ alias: decimal_tbl_txt Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) - outputColumnNames: _col0, _col1 + expressions: dec (type: decimal(10,0)) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,0)), _col1 (type: decimal(11,0)) + value expressions: _col0 (type: decimal(10,0)) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(10,0)), VALUE._col1 (type: decimal(11,0)) + expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -238,17 +238,17 @@ alias: decimal_tbl_rc Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) - outputColumnNames: _col0, _col1 + expressions: dec (type: decimal(10,0)) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,0)), _col1 (type: decimal(11,0)) + value expressions: _col0 (type: decimal(10,0)) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(10,0)), VALUE._col1 (type: decimal(11,0)) + expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -375,18 +375,18 @@ alias: decimal_tbl_orc Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) - outputColumnNames: _col0, _col1 + expressions: dec (type: decimal(10,0)) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: round(_col0, (- 1)) (type: decimal(11,0)) sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,0)), _col1 (type: decimal(11,0)) + value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(10,0)), VALUE._col1 (type: decimal(11,0)) + expressions: VALUE._col0 (type: decimal(10,0)), KEY.reducesinkkey0 (type: decimal(11,0)) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out =================================================================== --- ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out (working copy) @@ -77,11 +77,11 @@ Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$INTNAME1 + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME1 + $INTNAME1 TableScan HashTable Sink Operator keys: @@ -114,11 +114,11 @@ Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$INTNAME + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME + $INTNAME TableScan HashTable Sink Operator keys: @@ -188,11 +188,11 @@ Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_1:lineitem + $hdt$_1:lineitem Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_1:lineitem + $hdt$_1:lineitem TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE @@ -342,11 +342,11 @@ Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$INTNAME1 + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME1 + $INTNAME1 TableScan HashTable Sink Operator keys: @@ -379,11 +379,11 @@ Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$INTNAME + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME + $INTNAME TableScan HashTable Sink Operator keys: @@ -453,11 +453,11 @@ Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_1:lineitem + $hdt$_1:lineitem Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_1:lineitem + $hdt$_1:lineitem TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Index: ql/src/test/results/clientpositive/vectorized_parquet_types.q.out =================================================================== --- ql/src/test/results/clientpositive/vectorized_parquet_types.q.out (revision 0) +++ ql/src/test/results/clientpositive/vectorized_parquet_types.q.out (working copy) @@ -0,0 +1,378 @@ +PREHOOK: query: DROP TABLE parquet_types_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_types_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE parquet_types +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_types +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- init +CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date, + cdecimal decimal(4,2) +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: -- init +CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date, + cdecimal decimal(4,2) +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + cdecimal decimal(4,2) +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types +POSTHOOK: query: CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + cdecimal decimal(4,2) +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), cdecimal FROM parquet_types_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_types +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), cdecimal FROM parquet_types_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_types +POSTHOOK: Lineage: parquet_types.cbinary EXPRESSION [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cbinary, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_types.cchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cchar, type:char(5), comment:null), ] +POSTHOOK: Lineage: parquet_types.cdecimal SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdecimal, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] +PREHOOK: query: -- select +explain +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types +PREHOOK: type: QUERY +POSTHOOK: query: -- select +explain +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), t (type: timestamp), cchar (type: char(5)), cvarchar (type: varchar(10)), hex(cbinary) (type: string), cdecimal (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 a a B4F3CAFDBEDD 48.88 +101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 ab ab 68692CCAC0BDE7 8.72 +102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 abc abc B4F3CAFDBEDD 90.21 +103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444 abcd abcd 68692CCAC0BDE7 3.89 +104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555 abcde abcde B4F3CAFDBEDD 56.23 +105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666 abcde abcdef 68692CCAC0BDE7 90.21 +106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777 abcde abcdefg B4F3CAFDBEDD 6.09 +107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888 bcdef abcdefgh 68692CCAC0BDE7 9.44 +108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999 cdefg B4F3CAFDBE 68656C6C6F 77.54 +109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101 klmno abcdedef 68692CCAC0BDE7 25.42 +110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111 pqrst abcdede B4F3CAFDBEDD 60.12 +111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121 nopqr abcded 68692CCAC0BDE7 49.56 +112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131 opqrs abcdd B4F3CAFDBEDD 80.76 +113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141 pqrst abc 68692CCAC0BDE7 23.23 +114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151 qrstu b B4F3CAFDBEDD 1.01 +115 1 1 1.0 4.5 qrs 2026-04-04 16:16:16.161616161 rstuv abcded 68692CCAC0BDE7 5.98 +116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171 stuvw abcded B4F3CAFDBEDD 11.22 +117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181 tuvwx abcded 68692CCAC0BDE7 9.88 +118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191 uvwzy abcdede B4F3CAFDBEDD 4.76 +119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 12.83 +120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD 73.04 +121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde 90.33 +PREHOOK: query: explain +SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cchar (type: char(5)), length(cchar) (type: int), cvarchar (type: varchar(10)), length(cvarchar) (type: int), cdecimal (type: decimal(4,2)), sign(cdecimal) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +a 1 a 3 48.88 1 +ab 2 ab 3 8.72 1 +abc 3 abc 3 90.21 1 +abcd 4 abcd 4 3.89 1 +abcde 5 abcde 5 56.23 1 +abcde 5 abcdef 6 90.21 1 +abcde 5 abcdefg 7 6.09 1 +bcdef 5 abcdefgh 8 9.44 1 +cdefg 5 B4F3CAFDBE 10 77.54 1 +klmno 5 abcdedef 8 25.42 1 +pqrst 5 abcdede 7 60.12 1 +nopqr 5 abcded 6 49.56 1 +opqrs 5 abcdd 5 80.76 1 +pqrst 5 abc 3 23.23 1 +qrstu 5 b 1 1.01 1 +rstuv 5 abcded 6 5.98 1 +stuvw 5 abcded 6 11.22 1 +tuvwx 5 abcded 6 9.88 1 +uvwzy 5 abcdede 7 4.76 1 +vwxyz 5 abcdede 7 12.83 1 +wxyza 5 abcde 5 73.04 1 +bcdef 5 abcde 5 90.33 1 +PREHOOK: query: explain +SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), cdecimal (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col1), min(_col2), count(_col3), avg(_col4), stddev_pop(_col5), max(_col6) + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +1 121 1 8 1.1749999970197678 2.0621590627301285 90.33 +2 119 1 7 1.2142857142857142 1.8 60.12 +3 120 1 7 1.171428578240531 1.7999999999999996 90.21 Index: ql/src/test/results/clientpositive/windowing_streaming.q.out =================================================================== --- ql/src/test/results/clientpositive/windowing_streaming.q.out (revision 1673556) +++ ql/src/test/results/clientpositive/windowing_streaming.q.out (working copy) @@ -65,43 +65,39 @@ TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string + output shape: _col1: string, _col2: string type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 - partition by: _col1 + order by: _col1 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col0 + arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _wcol0 (type: int) + expressions: _col2 (type: string), _wcol0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -139,37 +135,33 @@ TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.8 + TopN Hash Memory Usage: 0.8 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string + output shape: _col1: string, _col2: string type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 - partition by: _col1 + order by: _col1 + partition by: _col2 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col0 + arguments: _col1 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) @@ -179,7 +171,7 @@ predicate: (_wcol0 < 4) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _wcol0 (type: int) + expressions: _col2 (type: string), _wcol0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -312,37 +304,33 @@ TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), cdouble (type: double) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: ctinyint (type: tinyint), cdouble (type: double) + sort order: ++ + Map-reduce partition columns: ctinyint (type: tinyint) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.8 + TopN Hash Memory Usage: 0.8 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col5 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: tinyint, _col1: double + output shape: _col0: tinyint, _col5: double type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 + order by: _col5 partition by: _col0 raw input shape: window functions: window function definition alias: _wcol0 - arguments: _col1 + arguments: _col5 name: rank window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) @@ -352,7 +340,7 @@ predicate: (_wcol0 < 5) (type: boolean) Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: tinyint), _col1 (type: double), _wcol0 (type: int) + expressions: _col0 (type: tinyint), _col5 (type: double), _wcol0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE File Output Operator Index: serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/test/ThriftTestObj.java =================================================================== --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/test/ThriftTestObj.java (revision 1673556) +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/test/ThriftTestObj.java (working copy) @@ -528,7 +528,7 @@ struct.field3 = new ArrayList(_list0.size); for (int _i1 = 0; _i1 < _list0.size; ++_i1) { - InnerStruct _elem2; // required + InnerStruct _elem2; // optional _elem2 = new InnerStruct(); _elem2.read(iprot); struct.field3.add(_elem2); @@ -636,7 +636,7 @@ struct.field3 = new ArrayList(_list5.size); for (int _i6 = 0; _i6 < _list5.size; ++_i6) { - InnerStruct _elem7; // required + InnerStruct _elem7; // optional _elem7 = new InnerStruct(); _elem7.read(iprot); struct.field3.add(_elem7); Index: serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java =================================================================== --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java (revision 1673556) +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java (working copy) @@ -1211,7 +1211,7 @@ struct.lint = new ArrayList(_list18.size); for (int _i19 = 0; _i19 < _list18.size; ++_i19) { - int _elem20; // required + int _elem20; // optional _elem20 = iprot.readI32(); struct.lint.add(_elem20); } @@ -1229,7 +1229,7 @@ struct.lString = new ArrayList(_list21.size); for (int _i22 = 0; _i22 < _list21.size; ++_i22) { - String _elem23; // required + String _elem23; // optional _elem23 = iprot.readString(); struct.lString.add(_elem23); } @@ -1247,7 +1247,7 @@ struct.lintString = new ArrayList(_list24.size); for (int _i25 = 0; _i25 < _list24.size; ++_i25) { - IntString _elem26; // required + IntString _elem26; // optional _elem26 = new IntString(); _elem26.read(iprot); struct.lintString.add(_elem26); @@ -1610,7 +1610,7 @@ struct.lint = new ArrayList(_list57.size); for (int _i58 = 0; _i58 < _list57.size; ++_i58) { - int _elem59; // required + int _elem59; // optional _elem59 = iprot.readI32(); struct.lint.add(_elem59); } @@ -1623,7 +1623,7 @@ struct.lString = new ArrayList(_list60.size); for (int _i61 = 0; _i61 < _list60.size; ++_i61) { - String _elem62; // required + String _elem62; // optional _elem62 = iprot.readString(); struct.lString.add(_elem62); } @@ -1636,7 +1636,7 @@ struct.lintString = new ArrayList(_list63.size); for (int _i64 = 0; _i64 < _list63.size; ++_i64) { - IntString _elem65; // required + IntString _elem65; // optional _elem65 = new IntString(); _elem65.read(iprot); struct.lintString.add(_elem65); Index: serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/MegaStruct.java =================================================================== --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/MegaStruct.java (revision 1673556) +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/MegaStruct.java (working copy) @@ -2280,7 +2280,7 @@ _val19 = new ArrayList(_list20.size); for (int _i21 = 0; _i21 < _list20.size; ++_i21) { - String _elem22; // required + String _elem22; // optional _elem22 = iprot.readString(); _val19.add(_elem22); } @@ -2310,7 +2310,7 @@ _val26 = new ArrayList(_list27.size); for (int _i28 = 0; _i28 < _list27.size; ++_i28) { - MiniStruct _elem29; // required + MiniStruct _elem29; // optional _elem29 = new MiniStruct(); _elem29.read(iprot); _val26.add(_elem29); @@ -2333,7 +2333,7 @@ struct.my_stringlist = new ArrayList(_list30.size); for (int _i31 = 0; _i31 < _list30.size; ++_i31) { - String _elem32; // required + String _elem32; // optional _elem32 = iprot.readString(); struct.my_stringlist.add(_elem32); } @@ -2351,7 +2351,7 @@ struct.my_structlist = new ArrayList(_list33.size); for (int _i34 = 0; _i34 < _list33.size; ++_i34) { - MiniStruct _elem35; // required + MiniStruct _elem35; // optional _elem35 = new MiniStruct(); _elem35.read(iprot); struct.my_structlist.add(_elem35); @@ -2370,7 +2370,7 @@ struct.my_enumlist = new ArrayList(_list36.size); for (int _i37 = 0; _i37 < _list36.size; ++_i37) { - MyEnum _elem38; // required + MyEnum _elem38; // optional _elem38 = MyEnum.findByValue(iprot.readI32()); struct.my_enumlist.add(_elem38); } @@ -2388,7 +2388,7 @@ struct.my_stringset = new HashSet(2*_set39.size); for (int _i40 = 0; _i40 < _set39.size; ++_i40) { - String _elem41; // required + String _elem41; // optional _elem41 = iprot.readString(); struct.my_stringset.add(_elem41); } @@ -2406,7 +2406,7 @@ struct.my_enumset = new HashSet(2*_set42.size); for (int _i43 = 0; _i43 < _set42.size; ++_i43) { - MyEnum _elem44; // required + MyEnum _elem44; // optional _elem44 = MyEnum.findByValue(iprot.readI32()); struct.my_enumset.add(_elem44); } @@ -2424,7 +2424,7 @@ struct.my_structset = new HashSet(2*_set45.size); for (int _i46 = 0; _i46 < _set45.size; ++_i46) { - MiniStruct _elem47; // required + MiniStruct _elem47; // optional _elem47 = new MiniStruct(); _elem47.read(iprot); struct.my_structset.add(_elem47); @@ -3023,7 +3023,7 @@ _val95 = new ArrayList(_list96.size); for (int _i97 = 0; _i97 < _list96.size; ++_i97) { - String _elem98; // required + String _elem98; // optional _elem98 = iprot.readString(); _val95.add(_elem98); } @@ -3047,7 +3047,7 @@ _val102 = new ArrayList(_list103.size); for (int _i104 = 0; _i104 < _list103.size; ++_i104) { - MiniStruct _elem105; // required + MiniStruct _elem105; // optional _elem105 = new MiniStruct(); _elem105.read(iprot); _val102.add(_elem105); @@ -3064,7 +3064,7 @@ struct.my_stringlist = new ArrayList(_list106.size); for (int _i107 = 0; _i107 < _list106.size; ++_i107) { - String _elem108; // required + String _elem108; // optional _elem108 = iprot.readString(); struct.my_stringlist.add(_elem108); } @@ -3077,7 +3077,7 @@ struct.my_structlist = new ArrayList(_list109.size); for (int _i110 = 0; _i110 < _list109.size; ++_i110) { - MiniStruct _elem111; // required + MiniStruct _elem111; // optional _elem111 = new MiniStruct(); _elem111.read(iprot); struct.my_structlist.add(_elem111); @@ -3091,7 +3091,7 @@ struct.my_enumlist = new ArrayList(_list112.size); for (int _i113 = 0; _i113 < _list112.size; ++_i113) { - MyEnum _elem114; // required + MyEnum _elem114; // optional _elem114 = MyEnum.findByValue(iprot.readI32()); struct.my_enumlist.add(_elem114); } @@ -3104,7 +3104,7 @@ struct.my_stringset = new HashSet(2*_set115.size); for (int _i116 = 0; _i116 < _set115.size; ++_i116) { - String _elem117; // required + String _elem117; // optional _elem117 = iprot.readString(); struct.my_stringset.add(_elem117); } @@ -3117,7 +3117,7 @@ struct.my_enumset = new HashSet(2*_set118.size); for (int _i119 = 0; _i119 < _set118.size; ++_i119) { - MyEnum _elem120; // required + MyEnum _elem120; // optional _elem120 = MyEnum.findByValue(iprot.readI32()); struct.my_enumset.add(_elem120); } @@ -3130,7 +3130,7 @@ struct.my_structset = new HashSet(2*_set121.size); for (int _i122 = 0; _i122 < _set121.size; ++_i122) { - MiniStruct _elem123; // required + MiniStruct _elem123; // optional _elem123 = new MiniStruct(); _elem123.read(iprot); struct.my_structset.add(_elem123); Index: serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/PropValueUnion.java =================================================================== --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/PropValueUnion.java (revision 1673556) +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/PropValueUnion.java (working copy) @@ -300,7 +300,7 @@ lString = new ArrayList(_list0.size); for (int _i1 = 0; _i1 < _list0.size; ++_i1) { - String _elem2; // required + String _elem2; // optional _elem2 = iprot.readString(); lString.add(_elem2); } @@ -423,7 +423,7 @@ lString = new ArrayList(_list9.size); for (int _i10 = 0; _i10 < _list9.size; ++_i10) { - String _elem11; // required + String _elem11; // optional _elem11 = iprot.readString(); lString.add(_elem11); } Index: serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/SetIntString.java =================================================================== --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/SetIntString.java (revision 1673556) +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/SetIntString.java (working copy) @@ -431,7 +431,7 @@ struct.sIntString = new HashSet(2*_set82.size); for (int _i83 = 0; _i83 < _set82.size; ++_i83) { - IntString _elem84; // required + IntString _elem84; // optional _elem84 = new IntString(); _elem84.read(iprot); struct.sIntString.add(_elem84); @@ -530,7 +530,7 @@ struct.sIntString = new HashSet(2*_set87.size); for (int _i88 = 0; _i88 < _set87.size; ++_i88) { - IntString _elem89; // required + IntString _elem89; // optional _elem89 = new IntString(); _elem89.read(iprot); struct.sIntString.add(_elem89); Index: serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java (working copy) @@ -19,11 +19,13 @@ package org.apache.hadoop.hive.serde2; import java.io.IOException; +import java.util.Arrays; import org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream; import org.apache.hadoop.hive.common.io.NonSyncByteArrayOutputStream; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.ByteStream.Output; /** * Extensions to bytearrayinput/output streams. @@ -90,14 +92,32 @@ } @Override + public void writeByte(long offset, byte value) { + getData()[(int) offset] = value; + } + + @Override public void reserve(int byteCount) { for (int i = 0; i < byteCount; ++i) { write(0); } } + + public boolean arraysEquals(Output output) { + if (count != output.count) { + return false; + } + for (int i = 0; i < count; i++) { + if (buf[i] != output.buf[i]) { + return false; + } + } + return true; + } } public static interface RandomAccessOutput { + public void writeByte(long offset, byte value); public void writeInt(long offset, int value); public void reserve(int byteCount); public void write(int b); Index: serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java (working copy) @@ -19,11 +19,14 @@ package org.apache.hadoop.hive.serde2; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.StringUtils; +import com.google.common.collect.Lists; + /** * ColumnProjectionUtils. * @@ -151,6 +154,14 @@ return result; } + public static List getReadColumnNames(Configuration conf) { + String colNames = conf.get(READ_COLUMN_NAMES_CONF_STR, READ_COLUMN_IDS_CONF_STR_DEFAULT); + if (colNames != null && !colNames.isEmpty()) { + return Arrays.asList(colNames.split(",")); + } + return Lists.newArrayList(); + } + private static void setReadColumnIDConf(Configuration conf, String id) { if (id.trim().isEmpty()) { conf.set(READ_COLUMN_IDS_CONF_STR, READ_COLUMN_IDS_CONF_STR_DEFAULT); Index: serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java (working copy) @@ -41,72 +41,93 @@ private final long offsetMask; private final long maxSize; - private byte[] currentWriteBuffer; - private int currentWriteBufferIndex; - /** The offset in the last writeBuffer where the values are added */ - private int currentWriteOffset = 0; + public static class Position { + private byte[] buffer = null; + private int bufferIndex = 0; + private int offset = 0; + public void clear() { + buffer = null; + bufferIndex = offset = -1; + } + } - private byte[] currentReadBuffer = null; - private int currentReadBufferIndex = 0; - private int currentReadOffset = 0; + Position writePos = new Position(); // Position where we'd write + Position defaultReadPos = new Position(); // Position where we'd read (by default). + public WriteBuffers(int wbSize, long maxSize) { this.wbSize = Integer.bitCount(wbSize) == 1 ? wbSize : (Integer.highestOneBit(wbSize) << 1); this.wbSizeLog2 = 31 - Integer.numberOfLeadingZeros(this.wbSize); this.offsetMask = this.wbSize - 1; this.maxSize = maxSize; - currentWriteBufferIndex = -1; + writePos.bufferIndex = -1; nextBufferToWrite(); } public long readVLong() { - ponderNextBufferToRead(); - byte firstByte = currentReadBuffer[currentReadOffset++]; + return readVLong(defaultReadPos); + } + + public long readVLong(Position readPos) { + ponderNextBufferToRead(readPos); + byte firstByte = readPos.buffer[readPos.offset++]; int length = (byte) WritableUtils.decodeVIntSize(firstByte) - 1; if (length == 0) { return firstByte; } long i = 0; - if (isAllInOneReadBuffer(length)) { + if (isAllInOneReadBuffer(length, readPos)) { for (int idx = 0; idx < length; idx++) { - i = (i << 8) | (currentReadBuffer[currentReadOffset + idx] & 0xFF); + i = (i << 8) | (readPos.buffer[readPos.offset + idx] & 0xFF); } - currentReadOffset += length; + readPos.offset += length; } else { for (int idx = 0; idx < length; idx++) { - i = (i << 8) | (readNextByte() & 0xFF); + i = (i << 8) | (readNextByte(readPos) & 0xFF); } } return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i); } public void skipVLong() { - ponderNextBufferToRead(); - byte firstByte = currentReadBuffer[currentReadOffset++]; + skipVLong(defaultReadPos); + } + + public void skipVLong(Position readPos) { + ponderNextBufferToRead(readPos); + byte firstByte = readPos.buffer[readPos.offset++]; int length = (byte) WritableUtils.decodeVIntSize(firstByte); if (length > 1) { - currentReadOffset += (length - 1); + readPos.offset += (length - 1); } - int diff = currentReadOffset - wbSize; + int diff = readPos.offset - wbSize; while (diff >= 0) { - ++currentReadBufferIndex; - currentReadBuffer = writeBuffers.get(currentReadBufferIndex); - currentReadOffset = diff; - diff = currentReadOffset - wbSize; + ++readPos.bufferIndex; + readPos.buffer = writeBuffers.get(readPos.bufferIndex); + readPos.offset = diff; + diff = readPos.offset - wbSize; } } public void setReadPoint(long offset) { - currentReadBufferIndex = getBufferIndex(offset); - currentReadBuffer = writeBuffers.get(currentReadBufferIndex); - currentReadOffset = getOffset(offset); + setReadPoint(offset, defaultReadPos); } + public void setReadPoint(long offset, Position readPos) { + readPos.bufferIndex = getBufferIndex(offset); + readPos.buffer = writeBuffers.get(readPos.bufferIndex); + readPos.offset = getOffset(offset); + } + public int hashCode(long offset, int length) { - setReadPoint(offset); - if (isAllInOneReadBuffer(length)) { - int result = murmurHash(currentReadBuffer, currentReadOffset, length); - currentReadOffset += length; + return hashCode(offset, length, defaultReadPos); + } + + public int hashCode(long offset, int length, Position readPos) { + setReadPoint(offset, readPos); + if (isAllInOneReadBuffer(length, readPos)) { + int result = murmurHash(readPos.buffer, readPos.offset, length); + readPos.offset += length; return result; } @@ -114,26 +135,26 @@ byte[] bytes = new byte[length]; int destOffset = 0; while (destOffset < length) { - ponderNextBufferToRead(); - int toRead = Math.min(length - destOffset, wbSize - currentReadOffset); - System.arraycopy(currentReadBuffer, currentReadOffset, bytes, destOffset, toRead); - currentReadOffset += toRead; + ponderNextBufferToRead(readPos); + int toRead = Math.min(length - destOffset, wbSize - readPos.offset); + System.arraycopy(readPos.buffer, readPos.offset, bytes, destOffset, toRead); + readPos.offset += toRead; destOffset += toRead; } return murmurHash(bytes, 0, bytes.length); } - private byte readNextByte() { + private byte readNextByte(Position readPos) { // This method is inefficient. It's only used when something crosses buffer boundaries. - ponderNextBufferToRead(); - return currentReadBuffer[currentReadOffset++]; + ponderNextBufferToRead(readPos); + return readPos.buffer[readPos.offset++]; } - private void ponderNextBufferToRead() { - if (currentReadOffset >= wbSize) { - ++currentReadBufferIndex; - currentReadBuffer = writeBuffers.get(currentReadBufferIndex); - currentReadOffset = 0; + private void ponderNextBufferToRead(Position readPos) { + if (readPos.offset >= wbSize) { + ++readPos.bufferIndex; + readPos.buffer = writeBuffers.get(readPos.bufferIndex); + readPos.offset = 0; } } @@ -149,26 +170,26 @@ @Override public void reserve(int byteCount) { if (byteCount < 0) throw new AssertionError("byteCount must be non-negative"); - int currentWriteOffset = this.currentWriteOffset + byteCount; + int currentWriteOffset = writePos.offset + byteCount; while (currentWriteOffset > wbSize) { nextBufferToWrite(); currentWriteOffset -= wbSize; } - this.currentWriteOffset = currentWriteOffset; + writePos.offset = currentWriteOffset; } public void setWritePoint(long offset) { - currentWriteBufferIndex = getBufferIndex(offset); - currentWriteBuffer = writeBuffers.get(currentWriteBufferIndex); - currentWriteOffset = getOffset(offset); + writePos.bufferIndex = getBufferIndex(offset); + writePos.buffer = writeBuffers.get(writePos.bufferIndex); + writePos.offset = getOffset(offset); } @Override public void write(int b) { - if (currentWriteOffset == wbSize) { + if (writePos.offset == wbSize) { nextBufferToWrite(); } - currentWriteBuffer[currentWriteOffset++] = (byte)b; + writePos.buffer[writePos.offset++] = (byte)b; } @Override @@ -180,11 +201,11 @@ public void write(byte[] b, int off, int len) { int srcOffset = 0; while (srcOffset < len) { - int toWrite = Math.min(len - srcOffset, wbSize - currentWriteOffset); - System.arraycopy(b, srcOffset + off, currentWriteBuffer, currentWriteOffset, toWrite); - currentWriteOffset += toWrite; + int toWrite = Math.min(len - srcOffset, wbSize - writePos.offset); + System.arraycopy(b, srcOffset + off, writePos.buffer, writePos.offset, toWrite); + writePos.offset += toWrite; srcOffset += toWrite; - if (currentWriteOffset == wbSize) { + if (writePos.offset == wbSize) { nextBufferToWrite(); } } @@ -204,16 +225,16 @@ } private void nextBufferToWrite() { - if (currentWriteBufferIndex == (writeBuffers.size() - 1)) { + if (writePos.bufferIndex == (writeBuffers.size() - 1)) { if ((1 + writeBuffers.size()) * ((long)wbSize) > maxSize) { // We could verify precisely at write time, but just do approximate at allocation time. throw new RuntimeException("Too much memory used by write buffers"); } writeBuffers.add(new byte[wbSize]); } - ++currentWriteBufferIndex; - currentWriteBuffer = writeBuffers.get(currentWriteBufferIndex); - currentWriteOffset = 0; + ++writePos.bufferIndex; + writePos.buffer = writeBuffers.get(writePos.bufferIndex); + writePos.offset = 0; } /** Compares two parts of the buffer with each other. Does not modify readPoint. */ @@ -282,20 +303,61 @@ return true; } + /** + * Compares part of the buffer with a part of an external byte array. + * Does not modify readPoint. + */ + public boolean isEqual(byte[] left, int leftOffset, int leftLength, long rightOffset, int rightLength) { + if (rightLength != leftLength) { + return false; + } + int rightIndex = getBufferIndex(rightOffset), rightFrom = getOffset(rightOffset); + byte[] rightBuffer = writeBuffers.get(rightIndex); + if (rightFrom + rightLength <= wbSize) { + // TODO: allow using unsafe optionally. + for (int i = 0; i < leftLength; ++i) { + if (left[leftOffset + i] != rightBuffer[rightFrom + i]) { + return false; + } + } + return true; + } + for (int i = 0; i < rightLength; ++i) { + if (rightFrom == wbSize) { + ++rightIndex; + rightBuffer = writeBuffers.get(rightIndex); + rightFrom = 0; + } + if (left[leftOffset + i] != rightBuffer[rightFrom++]) { + return false; + } + } + return true; + } + public void clear() { writeBuffers.clear(); - currentWriteBuffer = currentReadBuffer = null; - currentWriteOffset = currentReadOffset = currentWriteBufferIndex = currentReadBufferIndex = 0; + clearState(); } + private void clearState() { + writePos.clear(); + defaultReadPos.clear(); + } + + public long getWritePoint() { - return ((long)currentWriteBufferIndex << wbSizeLog2) + currentWriteOffset; + return ((long)writePos.bufferIndex << wbSizeLog2) + writePos.offset; } public long getReadPoint() { - return ((long)currentReadBufferIndex << wbSizeLog2) + currentReadOffset; + return getReadPoint(defaultReadPos); } + public long getReadPoint(Position readPos) { + return (readPos.bufferIndex * (long)wbSize) + readPos.offset; + } + public void writeVLong(long value) { LazyBinaryUtils.writeVLong(this, value); } @@ -312,22 +374,22 @@ readBuffer = writeBuffers.get(readBufIndex); readBufOffset = 0; } - if (currentWriteOffset == wbSize) { + if (writePos.offset == wbSize) { nextBufferToWrite(); } // How much we can read from current read buffer, out of what we need. int toRead = Math.min(length - srcOffset, wbSize - readBufOffset); // How much we can write to current write buffer, out of what we need. - int toWrite = Math.min(toRead, wbSize - currentWriteOffset); - System.arraycopy(readBuffer, readBufOffset, currentWriteBuffer, currentWriteOffset, toWrite); - currentWriteOffset += toWrite; + int toWrite = Math.min(toRead, wbSize - writePos.offset); + System.arraycopy(readBuffer, readBufOffset, writePos.buffer, writePos.offset, toWrite); + writePos.offset += toWrite; readBufOffset += toWrite; srcOffset += toWrite; if (toRead > toWrite) { nextBufferToWrite(); toRead -= toWrite; // Remains to copy from current read buffer. Less than wbSize by def. - System.arraycopy(readBuffer, readBufOffset, currentWriteBuffer, currentWriteOffset, toRead); - currentWriteOffset += toRead; + System.arraycopy(readBuffer, readBufOffset, writePos.buffer, writePos.offset, toRead); + writePos.offset += toRead; readBufOffset += toRead; srcOffset += toRead; } @@ -404,58 +466,57 @@ } } - private boolean isAllInOneReadBuffer(int length) { - return currentReadOffset + length <= wbSize; + private boolean isAllInOneReadBuffer(int length, Position readPos) { + return readPos.offset + length <= wbSize; } private boolean isAllInOneWriteBuffer(int length) { - return currentWriteOffset + length <= wbSize; + return writePos.offset + length <= wbSize; } public void seal() { - if (currentWriteOffset < (wbSize * 0.8)) { // arbitrary - byte[] smallerBuffer = new byte[currentWriteOffset]; - System.arraycopy(currentWriteBuffer, 0, smallerBuffer, 0, currentWriteOffset); - writeBuffers.set(currentWriteBufferIndex, smallerBuffer); + if (writePos.offset < (wbSize * 0.8)) { // arbitrary + byte[] smallerBuffer = new byte[writePos.offset]; + System.arraycopy(writePos.buffer, 0, smallerBuffer, 0, writePos.offset); + writeBuffers.set(writePos.bufferIndex, smallerBuffer); } - if (currentWriteBufferIndex + 1 < writeBuffers.size()) { - writeBuffers.subList(currentWriteBufferIndex + 1, writeBuffers.size()).clear(); + if (writePos.bufferIndex + 1 < writeBuffers.size()) { + writeBuffers.subList(writePos.bufferIndex + 1, writeBuffers.size()).clear(); } - currentWriteBuffer = currentReadBuffer = null; // Make sure we don't reference any old buffer. - currentWriteBufferIndex = currentReadBufferIndex = -1; - currentReadOffset = currentWriteOffset = -1; + // Make sure we don't reference any old buffer. + clearState(); } - public long readFiveByteULong(long offset) { - return readNByteLong(offset, 5); + public long readNByteLong(long offset, int bytes) { + return readNByteLong(offset, bytes, defaultReadPos); } - private long readNByteLong(long offset, int bytes) { - setReadPoint(offset); + public long readNByteLong(long offset, int bytes, Position readPos) { + setReadPoint(offset, readPos); long v = 0; - if (isAllInOneReadBuffer(bytes)) { + if (isAllInOneReadBuffer(bytes, readPos)) { for (int i = 0; i < bytes; ++i) { - v = (v << 8) + (currentReadBuffer[currentReadOffset + i] & 0xff); + v = (v << 8) + (readPos.buffer[readPos.offset + i] & 0xff); } - currentReadOffset += bytes; + readPos.offset += bytes; } else { for (int i = 0; i < bytes; ++i) { - v = (v << 8) + (readNextByte() & 0xff); + v = (v << 8) + (readNextByte(readPos) & 0xff); } } return v; } public void writeFiveByteULong(long offset, long v) { - int prevIndex = currentWriteBufferIndex, prevOffset = currentWriteOffset; + int prevIndex = writePos.bufferIndex, prevOffset = writePos.offset; setWritePoint(offset); if (isAllInOneWriteBuffer(5)) { - currentWriteBuffer[currentWriteOffset] = (byte)(v >>> 32); - currentWriteBuffer[currentWriteOffset + 1] = (byte)(v >>> 24); - currentWriteBuffer[currentWriteOffset + 2] = (byte)(v >>> 16); - currentWriteBuffer[currentWriteOffset + 3] = (byte)(v >>> 8); - currentWriteBuffer[currentWriteOffset + 4] = (byte)(v); - currentWriteOffset += 5; + writePos.buffer[writePos.offset] = (byte)(v >>> 32); + writePos.buffer[writePos.offset + 1] = (byte)(v >>> 24); + writePos.buffer[writePos.offset + 2] = (byte)(v >>> 16); + writePos.buffer[writePos.offset + 3] = (byte)(v >>> 8); + writePos.buffer[writePos.offset + 4] = (byte)(v); + writePos.offset += 5; } else { setByte(offset++, (byte)(v >>> 32)); setByte(offset++, (byte)(v >>> 24)); @@ -463,9 +524,9 @@ setByte(offset++, (byte)(v >>> 8)); setByte(offset, (byte)(v)); } - currentWriteBufferIndex = prevIndex; - currentWriteBuffer = writeBuffers.get(currentWriteBufferIndex); - currentWriteOffset = prevOffset; + writePos.bufferIndex = prevIndex; + writePos.buffer = writeBuffers.get(writePos.bufferIndex); + writePos.offset = prevOffset; } public int readInt(long offset) { @@ -474,25 +535,38 @@ @Override public void writeInt(long offset, int v) { - int prevIndex = currentWriteBufferIndex, prevOffset = currentWriteOffset; + int prevIndex = writePos.bufferIndex, prevOffset = writePos.offset; setWritePoint(offset); if (isAllInOneWriteBuffer(4)) { - currentWriteBuffer[currentWriteOffset] = (byte)(v >> 24); - currentWriteBuffer[currentWriteOffset + 1] = (byte)(v >> 16); - currentWriteBuffer[currentWriteOffset + 2] = (byte)(v >> 8); - currentWriteBuffer[currentWriteOffset + 3] = (byte)(v); - currentWriteOffset += 4; + writePos.buffer[writePos.offset] = (byte)(v >> 24); + writePos.buffer[writePos.offset + 1] = (byte)(v >> 16); + writePos.buffer[writePos.offset + 2] = (byte)(v >> 8); + writePos.buffer[writePos.offset + 3] = (byte)(v); + writePos.offset += 4; } else { setByte(offset++, (byte)(v >>> 24)); setByte(offset++, (byte)(v >>> 16)); setByte(offset++, (byte)(v >>> 8)); setByte(offset, (byte)(v)); } - currentWriteBufferIndex = prevIndex; - currentWriteBuffer = writeBuffers.get(currentWriteBufferIndex); - currentWriteOffset = prevOffset; + writePos.bufferIndex = prevIndex; + writePos.buffer = writeBuffers.get(writePos.bufferIndex); + writePos.offset = prevOffset; } + + @Override + public void writeByte(long offset, byte value) { + int prevIndex = writePos.bufferIndex, prevOffset = writePos.offset; + setWritePoint(offset); + // One byte is always available for writing. + writePos.buffer[writePos.offset] = value; + + writePos.bufferIndex = prevIndex; + writePos.buffer = writeBuffers.get(writePos.bufferIndex); + writePos.offset = prevOffset; + } + // Lifted from org.apache.hadoop.util.hash.MurmurHash... but supports offset. public static int murmurHash(byte[] data, int offset, int length) { int m = 0x5bd1e995; @@ -551,4 +625,8 @@ public long size() { return writeBuffers.size() * (long) wbSize; } + + public Position getReadPosition() { + return defaultReadPos; + } } \ No newline at end of file Index: serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java (working copy) @@ -189,7 +189,7 @@ String columnName = columnNames.get(i); Object datum = record.get(columnName); Schema datumSchema = record.getSchema().getField(columnName).schema(); - Schema.Field field = fileSchema.getField(columnName); + Schema.Field field = AvroSerdeUtils.isNullableType(fileSchema)?AvroSerdeUtils.getOtherTypeFromNullableType(fileSchema).getField(columnName):fileSchema.getField(columnName); objectRow.add(worker(datum, field == null ? null : field.schema(), datumSchema, columnType)); } Index: serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java (working copy) @@ -43,13 +43,15 @@ public class AvroObjectInspectorGenerator { final private List columnNames; final private List columnTypes; + final private List columnComments; final private ObjectInspector oi; public AvroObjectInspectorGenerator(Schema schema) throws SerDeException { verifySchemaIsARecord(schema); - this.columnNames = generateColumnNames(schema); + this.columnNames = AvroObjectInspectorGenerator.generateColumnNames(schema); this.columnTypes = SchemaToTypeInfo.generateColumnTypes(schema); + this.columnComments = AvroObjectInspectorGenerator.generateColumnComments(schema); assert columnNames.size() == columnTypes.size(); this.oi = createObjectInspector(); } @@ -80,7 +82,7 @@ for(int i = 0; i < columnNames.size(); i++) { columnOIs.add(i, createObjectInspectorWorker(columnTypes.get(i))); } - return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); + return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs, columnComments); } private ObjectInspector createObjectInspectorWorker(TypeInfo ti) throws SerDeException { @@ -145,7 +147,7 @@ c.equals(ObjectInspector.Category.UNION); } - private List generateColumnNames(Schema schema) { + public static List generateColumnNames(Schema schema) { List fields = schema.getFields(); List fieldsList = new ArrayList(fields.size()); @@ -156,4 +158,15 @@ return fieldsList; } + public static List generateColumnComments(Schema schema) { + List fields = schema.getFields(); + List fieldComments = new ArrayList(fields.size()); + + for (Schema.Field field : fields) { + String fieldComment = field.doc() == null ? "" : field.doc(); + fieldComments.add(fieldComment); + } + + return fieldComments; + } } Index: serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java (working copy) @@ -133,7 +133,9 @@ if (columnCommentProperty == null || columnCommentProperty.isEmpty()) { columnComments = new ArrayList(); } else { - columnComments = Arrays.asList(columnCommentProperty.split(",")); + //Comments are separated by "\0" in columnCommentProperty, see method getSchema + //in MetaStoreUtils where this string columns.comments is generated + columnComments = Arrays.asList(columnCommentProperty.split("\0")); LOG.info("columnComments is " + columnCommentProperty); } if (columnNames.size() != columnTypes.size()) { Index: serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java (working copy) @@ -132,7 +132,7 @@ boolean[] columnSortOrderIsDesc; private static byte[] decimalBuffer = null; - private static Charset decimalCharSet = Charset.forName("US-ASCII"); + public static Charset decimalCharSet = Charset.forName("US-ASCII"); @Override public void initialize(Configuration conf, Properties tbl) @@ -572,7 +572,7 @@ return ((BaseCharTypeInfo)type).getLength(); } - static Text deserializeText(InputByteBuffer buffer, boolean invert, Text r) + public static Text deserializeText(InputByteBuffer buffer, boolean invert, Text r) throws IOException { // Get the actual length first int start = buffer.tell(); @@ -636,7 +636,7 @@ return serializeBytesWritable; } - private static void writeByte(RandomAccessOutput buffer, byte b, boolean invert) { + public static void writeByte(RandomAccessOutput buffer, byte b, boolean invert) { if (invert) { b = (byte) (0xff ^ b); } @@ -892,7 +892,7 @@ } - private static void serializeBytes( + public static void serializeBytes( ByteStream.Output buffer, byte[] data, int length, boolean invert) { for (int i = 0; i < length; i++) { if (data[i] == 0 || data[i] == 1) { @@ -905,14 +905,27 @@ writeByte(buffer, (byte) 0, invert); } - private static void serializeInt(ByteStream.Output buffer, int v, boolean invert) { + public static void serializeBytes( + ByteStream.Output buffer, byte[] data, int offset, int length, boolean invert) { + for (int i = offset; i < offset + length; i++) { + if (data[i] == 0 || data[i] == 1) { + writeByte(buffer, (byte) 1, invert); + writeByte(buffer, (byte) (data[i] + 1), invert); + } else { + writeByte(buffer, data[i], invert); + } + } + writeByte(buffer, (byte) 0, invert); + } + + public static void serializeInt(ByteStream.Output buffer, int v, boolean invert) { writeByte(buffer, (byte) ((v >> 24) ^ 0x80), invert); writeByte(buffer, (byte) (v >> 16), invert); writeByte(buffer, (byte) (v >> 8), invert); writeByte(buffer, (byte) v, invert); } - private static void serializeLong(ByteStream.Output buffer, long v, boolean invert) { + public static void serializeLong(ByteStream.Output buffer, long v, boolean invert) { writeByte(buffer, (byte) ((v >> 56) ^ 0x80), invert); writeByte(buffer, (byte) (v >> 48), invert); writeByte(buffer, (byte) (v >> 40), invert); Index: serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/InputByteBuffer.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/InputByteBuffer.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/InputByteBuffer.java (working copy) @@ -78,6 +78,10 @@ return end; } + public final boolean isEof() { + return (start >= end); + } + /** * Returns the underlying byte array. */ Index: serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java (working copy) @@ -0,0 +1,746 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.binarysortable.fast; + +import java.io.EOFException; +import java.io.IOException; +import java.math.BigInteger; +import java.util.Arrays; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.InputByteBuffer; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.Text; + +/* + * Directly deserialize with the caller reading field-by-field the LazyBinary serialization format. + * + * The caller is responsible for calling the read method for the right type of each field + * (after calling readCheckNull). + * + * Reading some fields require a results object to receive value information. A separate + * results object is created by the caller at initialization per different field even for the same + * type. + * + * Some type values are by reference to either bytes in the deserialization buffer or to + * other type specific buffers. So, those references are only valid until the next time set is + * called. + */ +public class BinarySortableDeserializeRead implements DeserializeRead { + public static final Log LOG = LogFactory.getLog(BinarySortableDeserializeRead.class.getName()); + + private PrimitiveTypeInfo[] primitiveTypeInfos; + + // The sort order (ascending/descending) for each field. Set to true when descending (invert). + private boolean[] columnSortOrderIsDesc; + + // Which field we are on. We start with -1 so readCheckNull can increment once and the read + // field data methods don't increment. + private int fieldIndex; + + private int fieldCount; + + private int start; + + private DecimalTypeInfo saveDecimalTypeInfo; + private HiveDecimal saveDecimal; + + private byte[] tempDecimalBuffer; + private HiveDecimalWritable tempHiveDecimalWritable; + + private boolean readBeyondConfiguredFieldsWarned; + private boolean readBeyondBufferRangeWarned; + private boolean bufferRangeHasExtraDataWarned; + + private InputByteBuffer inputByteBuffer = new InputByteBuffer(); + + /* + * Use this constructor when only ascending sort order is used. + */ + public BinarySortableDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos) { + this(primitiveTypeInfos, null); + } + + public BinarySortableDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos, + boolean[] columnSortOrderIsDesc) { + this.primitiveTypeInfos = primitiveTypeInfos; + fieldCount = primitiveTypeInfos.length; + if (columnSortOrderIsDesc != null) { + this.columnSortOrderIsDesc = columnSortOrderIsDesc; + } else { + this.columnSortOrderIsDesc = new boolean[primitiveTypeInfos.length]; + Arrays.fill(this.columnSortOrderIsDesc, false); + } + inputByteBuffer = new InputByteBuffer(); + readBeyondConfiguredFieldsWarned = false; + readBeyondBufferRangeWarned = false; + bufferRangeHasExtraDataWarned = false; + } + + // Not public since we must have column information. + private BinarySortableDeserializeRead() { + } + + /* + * The primitive type information for all fields. + */ + public PrimitiveTypeInfo[] primitiveTypeInfos() { + return primitiveTypeInfos; + } + + /* + * Set the range of bytes to be deserialized. + */ + @Override + public void set(byte[] bytes, int offset, int length) { + fieldIndex = -1; + inputByteBuffer.reset(bytes, offset, offset + length); + start = offset; + } + + /* + * Reads the NULL information for a field. + * + * @return Returns true when the field is NULL; reading is positioned to the next field. + * Otherwise, false when the field is NOT NULL; reading is positioned to the field data. + */ + @Override + public boolean readCheckNull() throws IOException { + + // We start with fieldIndex as -1 so we can increment once here and then the read + // field data methods don't increment. + fieldIndex++; + + if (fieldIndex >= fieldCount) { + // Reading beyond the specified field count produces NULL. + if (!readBeyondConfiguredFieldsWarned) { + // Warn only once. + LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but " + + " reading more (NULLs returned). Ignoring similar problems."); + readBeyondConfiguredFieldsWarned = true; + } + return true; + } + if (inputByteBuffer.isEof()) { + // Also, reading beyond our byte range produces NULL. + if (!readBeyondBufferRangeWarned) { + // Warn only once. + int length = inputByteBuffer.tell() - start; + LOG.info("Reading beyond buffer range! Buffer range " + start + + " for length " + length + " but reading more... " + + "(total buffer length " + inputByteBuffer.getData().length + ")" + + " Ignoring similar problems."); + readBeyondBufferRangeWarned = true; + } + // We cannot read beyond so we must return NULL here. + return true; + } + byte isNull = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]); + + if (isNull == 0) { + return true; + } + + // We have a field and are positioned to it. + + if (primitiveTypeInfos[fieldIndex].getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { + return false; + } + + // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read + // it here. + return earlyReadHiveDecimal(); + } + + /* + * Call this method after all fields have been read to check for extra fields. + */ + public void extraFieldsCheck() { + if (!inputByteBuffer.isEof()) { + // We did not consume all of the byte range. + if (!bufferRangeHasExtraDataWarned) { + // Warn only once. + int length = inputByteBuffer.getEnd() - start; + int remaining = inputByteBuffer.getEnd() - inputByteBuffer.tell(); + LOG.info("Not all fields were read in the buffer range! Buffer range " + start + + " for length " + length + " but " + remaining + " bytes remain. " + + "(total buffer length " + inputByteBuffer.getData().length + ")" + + " Ignoring similar problems."); + bufferRangeHasExtraDataWarned = true; + } + } + } + + /* + * Read integrity warning flags. + */ + @Override + public boolean readBeyondConfiguredFieldsWarned() { + return readBeyondConfiguredFieldsWarned; + } + @Override + public boolean readBeyondBufferRangeWarned() { + return readBeyondBufferRangeWarned; + } + @Override + public boolean bufferRangeHasExtraDataWarned() { + return bufferRangeHasExtraDataWarned; + } + + /* + * BOOLEAN. + */ + @Override + public boolean readBoolean() throws IOException { + byte b = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]); + return (b == 2); + } + + /* + * BYTE. + */ + @Override + public byte readByte() throws IOException { + return (byte) (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) ^ 0x80); + } + + /* + * SHORT. + */ + @Override + public short readShort() throws IOException { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + return (short) v; + } + + /* + * INT. + */ + @Override + public int readInt() throws IOException { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + return v; + } + + /* + * LONG. + */ + @Override + public long readLong() throws IOException { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + long v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 7; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + return v; + } + + /* + * FLOAT. + */ + @Override + public float readFloat() throws IOException { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = 0; + for (int i = 0; i < 4; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + if ((v & (1 << 31)) == 0) { + // negative number, flip all bits + v = ~v; + } else { + // positive number, flip the first bit + v = v ^ (1 << 31); + } + return Float.intBitsToFloat(v); + } + + /* + * DOUBLE. + */ + @Override + public double readDouble() throws IOException { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + long v = 0; + for (int i = 0; i < 8; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + if ((v & (1L << 63)) == 0) { + // negative number, flip all bits + v = ~v; + } else { + // positive number, flip the first bit + v = v ^ (1L << 63); + } + return Double.longBitsToDouble(v); + } + + // This class is for internal use. + private static class BinarySortableReadStringResults extends ReadStringResults { + + // Use an org.apache.hadoop.io.Text object as a buffer to decode the BinarySortable + // format string into. + private Text text; + + public BinarySortableReadStringResults() { + super(); + text = new Text(); + } + } + + // Reading a STRING field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadStringResults createReadStringResults() { + return new BinarySortableReadStringResults(); + } + + + @Override + public void readString(ReadStringResults readStringResults) throws IOException { + BinarySortableReadStringResults binarySortableReadStringResults = + (BinarySortableReadStringResults) readStringResults; + + BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], binarySortableReadStringResults.text); + readStringResults.bytes = binarySortableReadStringResults.text.getBytes(); + readStringResults.start = 0; + readStringResults.length = binarySortableReadStringResults.text.getLength(); + } + + + /* + * CHAR. + */ + + // This class is for internal use. + private static class BinarySortableReadHiveCharResults extends ReadHiveCharResults { + + public BinarySortableReadHiveCharResults() { + super(); + } + + public HiveCharWritable getHiveCharWritable() { + return hiveCharWritable; + } + } + + // Reading a CHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different CHAR field. + @Override + public ReadHiveCharResults createReadHiveCharResults() { + return new BinarySortableReadHiveCharResults(); + } + + public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException { + BinarySortableReadHiveCharResults binarySortableReadHiveCharResults = + (BinarySortableReadHiveCharResults) readHiveCharResults; + + if (!binarySortableReadHiveCharResults.isInit()) { + binarySortableReadHiveCharResults.init((CharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + HiveCharWritable hiveCharWritable = binarySortableReadHiveCharResults.getHiveCharWritable(); + + // Decode the bytes into our Text buffer, then truncate. + BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], hiveCharWritable.getTextValue()); + hiveCharWritable.enforceMaxLength(binarySortableReadHiveCharResults.getMaxLength()); + + readHiveCharResults.bytes = hiveCharWritable.getTextValue().getBytes(); + readHiveCharResults.start = 0; + readHiveCharResults.length = hiveCharWritable.getTextValue().getLength(); + } + + /* + * VARCHAR. + */ + + // This class is for internal use. + private static class BinarySortableReadHiveVarcharResults extends ReadHiveVarcharResults { + + public BinarySortableReadHiveVarcharResults() { + super(); + } + + public HiveVarcharWritable getHiveVarcharWritable() { + return hiveVarcharWritable; + } + } + + // Reading a VARCHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different VARCHAR field. + @Override + public ReadHiveVarcharResults createReadHiveVarcharResults() { + return new BinarySortableReadHiveVarcharResults(); + } + + public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException { + BinarySortableReadHiveVarcharResults binarySortableReadHiveVarcharResults = (BinarySortableReadHiveVarcharResults) readHiveVarcharResults; + + if (!binarySortableReadHiveVarcharResults.isInit()) { + binarySortableReadHiveVarcharResults.init((VarcharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + HiveVarcharWritable hiveVarcharWritable = binarySortableReadHiveVarcharResults.getHiveVarcharWritable(); + + // Decode the bytes into our Text buffer, then truncate. + BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], hiveVarcharWritable.getTextValue()); + hiveVarcharWritable.enforceMaxLength(binarySortableReadHiveVarcharResults.getMaxLength()); + + readHiveVarcharResults.bytes = hiveVarcharWritable.getTextValue().getBytes(); + readHiveVarcharResults.start = 0; + readHiveVarcharResults.length = hiveVarcharWritable.getTextValue().getLength(); + } + + /* + * BINARY. + */ + + // This class is for internal use. + private static class BinarySortableReadBinaryResults extends ReadBinaryResults { + + // Use an org.apache.hadoop.io.Text object as a buffer to decode the BinarySortable + // format string into. + private Text text; + + public BinarySortableReadBinaryResults() { + super(); + text = new Text(); + } + } + + // Reading a BINARY field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadBinaryResults createReadBinaryResults() { + return new BinarySortableReadBinaryResults(); + } + + @Override + public void readBinary(ReadBinaryResults readBinaryResults) throws IOException { + BinarySortableReadBinaryResults binarySortableReadBinaryResults = + (BinarySortableReadBinaryResults) readBinaryResults; + + BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], binarySortableReadBinaryResults.text); + readBinaryResults.bytes = binarySortableReadBinaryResults.text.getBytes(); + readBinaryResults.start = 0; + readBinaryResults.length = binarySortableReadBinaryResults.text.getLength(); + } + + /* + * DATE. + */ + + // This class is for internal use. + private static class BinarySortableReadDateResults extends ReadDateResults { + + public BinarySortableReadDateResults() { + super(); + } + + public DateWritable getDateWritable() { + return dateWritable; + } + } + + // Reading a DATE field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DATE field. + @Override + public ReadDateResults createReadDateResults() { + return new BinarySortableReadDateResults(); + } + + @Override + public void readDate(ReadDateResults readDateResults) throws IOException { + BinarySortableReadDateResults binarySortableReadDateResults = (BinarySortableReadDateResults) readDateResults; + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + DateWritable dateWritable = binarySortableReadDateResults.getDateWritable(); + dateWritable.set(v); + } + + /* + * TIMESTAMP. + */ + + // This class is for internal use. + private static class BinarySortableReadTimestampResults extends ReadTimestampResults { + + private byte[] timestampBytes; + + public BinarySortableReadTimestampResults() { + super(); + timestampBytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH]; + } + + public TimestampWritable getTimestampWritable() { + return timestampWritable; + } + } + + // Reading a TIMESTAMP field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different TIMESTAMP field. + @Override + public ReadTimestampResults createReadTimestampResults() { + return new BinarySortableReadTimestampResults(); + } + + @Override + public void readTimestamp(ReadTimestampResults readTimestampResults) throws IOException { + BinarySortableReadTimestampResults binarySortableReadTimestampResults = (BinarySortableReadTimestampResults) readTimestampResults; + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + byte[] timestampBytes = binarySortableReadTimestampResults.timestampBytes; + for (int i = 0; i < timestampBytes.length; i++) { + timestampBytes[i] = inputByteBuffer.read(invert); + } + TimestampWritable timestampWritable = binarySortableReadTimestampResults.getTimestampWritable(); + timestampWritable.setBinarySortable(timestampBytes, 0); + } + + /* + * INTERVAL_YEAR_MONTH. + */ + + // This class is for internal use. + private static class BinarySortableReadIntervalYearMonthResults extends ReadIntervalYearMonthResults { + + public BinarySortableReadIntervalYearMonthResults() { + super(); + } + + public HiveIntervalYearMonthWritable getHiveIntervalYearMonthWritable() { + return hiveIntervalYearMonthWritable; + } + } + + // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_YEAR_MONTH field. + @Override + public ReadIntervalYearMonthResults createReadIntervalYearMonthResults() { + return new BinarySortableReadIntervalYearMonthResults(); + } + + @Override + public void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResults) + throws IOException { + BinarySortableReadIntervalYearMonthResults binarySortableReadIntervalYearMonthResults = + (BinarySortableReadIntervalYearMonthResults) readIntervalYearMonthResults; + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable = + binarySortableReadIntervalYearMonthResults.getHiveIntervalYearMonthWritable(); + hiveIntervalYearMonthWritable.set(v); + } + + /* + * INTERVAL_DAY_TIME. + */ + + // This class is for internal use. + private static class BinarySortableReadIntervalDayTimeResults extends ReadIntervalDayTimeResults { + + public BinarySortableReadIntervalDayTimeResults() { + super(); + } + + public HiveIntervalDayTimeWritable getHiveIntervalDayTimeWritable() { + return hiveIntervalDayTimeWritable; + } + } + + // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_DAY_TIME field. + @Override + public ReadIntervalDayTimeResults createReadIntervalDayTimeResults() { + return new BinarySortableReadIntervalDayTimeResults(); + } + + @Override + public void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResults) + throws IOException { + BinarySortableReadIntervalDayTimeResults binarySortableReadIntervalDayTimeResults = + (BinarySortableReadIntervalDayTimeResults) readIntervalDayTimeResults; + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + long totalSecs = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 7; i++) { + totalSecs = (totalSecs << 8) + (inputByteBuffer.read(invert) & 0xff); + } + int nanos = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + nanos = (nanos << 8) + (inputByteBuffer.read(invert) & 0xff); + } + HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable = + binarySortableReadIntervalDayTimeResults.getHiveIntervalDayTimeWritable(); + hiveIntervalDayTimeWritable.set(totalSecs, nanos); + } + + /* + * DECIMAL. + */ + + // This class is for internal use. + private static class BinarySortableReadDecimalResults extends ReadDecimalResults { + + public HiveDecimal hiveDecimal; + + public BinarySortableReadDecimalResults() { + super(); + } + + @Override + public void init(DecimalTypeInfo decimalTypeInfo) { + super.init(decimalTypeInfo); + } + + @Override + public HiveDecimal getHiveDecimal() { + return hiveDecimal; + } + } + + // Reading a DECIMAL field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DECIMAL field. + @Override + public ReadDecimalResults createReadDecimalResults() { + return new BinarySortableReadDecimalResults(); + } + + @Override + public void readHiveDecimal(ReadDecimalResults readDecimalResults) throws IOException { + BinarySortableReadDecimalResults binarySortableReadDecimalResults = + (BinarySortableReadDecimalResults) readDecimalResults; + + if (!binarySortableReadDecimalResults.isInit()) { + binarySortableReadDecimalResults.init(saveDecimalTypeInfo); + } + + binarySortableReadDecimalResults.hiveDecimal = saveDecimal; + + saveDecimal = null; + saveDecimalTypeInfo = null; + } + + /** + * We read the whole HiveDecimal value and then enforce precision and scale, which may + * make it a NULL. + * @return Returns true if this HiveDecimal enforced to a NULL. + * @throws IOException + */ + private boolean earlyReadHiveDecimal() throws IOException { + + // Since enforcing precision and scale can cause a HiveDecimal to become NULL, + // we must read it, enforce it here, and either return NULL or buffer the result. + + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int b = inputByteBuffer.read(invert) - 1; + assert (b == 1 || b == -1 || b == 0); + boolean positive = b != -1; + + int factor = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + factor = (factor << 8) + (inputByteBuffer.read(invert) & 0xff); + } + + if (!positive) { + factor = -factor; + } + + int start = inputByteBuffer.tell(); + int length = 0; + + do { + b = inputByteBuffer.read(positive ? invert : !invert); + assert(b != 1); + + if (b == 0) { + // end of digits + break; + } + + length++; + } while (true); + + if(tempDecimalBuffer == null || tempDecimalBuffer.length < length) { + tempDecimalBuffer = new byte[length]; + } + + inputByteBuffer.seek(start); + for (int i = 0; i < length; ++i) { + tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert); + } + + // read the null byte again + inputByteBuffer.read(positive ? invert : !invert); + + String digits = new String(tempDecimalBuffer, 0, length, BinarySortableSerDe.decimalCharSet); + BigInteger bi = new BigInteger(digits); + HiveDecimal bd = HiveDecimal.create(bi).scaleByPowerOfTen(factor-length); + + if (!positive) { + bd = bd.negate(); + } + + // We have a decimal. After we enforce precision and scale, will it become a NULL? + + if (tempHiveDecimalWritable == null) { + tempHiveDecimalWritable = new HiveDecimalWritable(); + } + tempHiveDecimalWritable.set(bd); + + saveDecimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfos[fieldIndex]; + + int precision = saveDecimalTypeInfo.getPrecision(); + int scale = saveDecimalTypeInfo.getScale(); + + saveDecimal = tempHiveDecimalWritable.getHiveDecimal(precision, scale); + + // Now return whether it is NULL or NOT NULL. + return (saveDecimal == null); + } +} \ No newline at end of file Index: serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java (working copy) @@ -0,0 +1,448 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.binarysortable.fast; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.Arrays; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.InputByteBuffer; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalDayTime; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalYearMonth; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hive.common.util.DateUtils; + +/* + * Directly serialize, field-by-field, the BinarySortable format. + * + * This is an alternative way to serialize than what is provided by BinarySortableSerDe. + */ +public class BinarySortableSerializeWrite implements SerializeWrite { + public static final Log LOG = LogFactory.getLog(BinarySortableSerializeWrite.class.getName()); + + private Output output; + + // The sort order (ascending/descending) for each field. Set to true when descending (invert). + private boolean[] columnSortOrderIsDesc; + + // Which field we are on. We start with -1 to be consistent in style with + // BinarySortableDeserializeRead. + private int index; + + private int fieldCount; + + private TimestampWritable tempTimestampWritable; + + public BinarySortableSerializeWrite(boolean[] columnSortOrderIsDesc) { + this(); + fieldCount = columnSortOrderIsDesc.length; + this.columnSortOrderIsDesc = columnSortOrderIsDesc; + } + + /* + * Use this constructor when only ascending sort order is used. + */ + public BinarySortableSerializeWrite(int fieldCount) { + this(); + this.fieldCount = fieldCount; + columnSortOrderIsDesc = new boolean[fieldCount]; + Arrays.fill(columnSortOrderIsDesc, false); + } + + // Not public since we must have the field count or column sort order information. + private BinarySortableSerializeWrite() { + tempTimestampWritable = new TimestampWritable(); + } + + /* + * Set the buffer that will receive the serialized data. + */ + @Override + public void set(Output output) { + this.output = output; + this.output.reset(); + index = -1; + } + + /* + * Reset the previously supplied buffer that will receive the serialized data. + */ + @Override + public void reset() { + output.reset(); + index = -1; + } + + /* + * Write a NULL field. + */ + @Override + public void writeNull() throws IOException { + BinarySortableSerDe.writeByte(output, (byte) 0, columnSortOrderIsDesc[++index]); + } + + /* + * BOOLEAN. + */ + @Override + public void writeBoolean(boolean v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.writeByte(output, (byte) (v ? 2 : 1), invert); + } + + /* + * BYTE. + */ + @Override + public void writeByte(byte v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.writeByte(output, (byte) (v ^ 0x80), invert); + } + + /* + * SHORT. + */ + @Override + public void writeShort(short v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.writeByte(output, (byte) ((v >> 8) ^ 0x80), invert); + BinarySortableSerDe.writeByte(output, (byte) v, invert); + } + + /* + * INT. + */ + @Override + public void writeInt(int v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeInt(output, v, invert); + } + + /* + * LONG. + */ + @Override + public void writeLong(long v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.writeByte(output, (byte) ((v >> 56) ^ 0x80), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 48), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 40), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 32), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 24), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 16), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 8), invert); + BinarySortableSerDe.writeByte(output, (byte) v, invert); + + } + + /* + * FLOAT. + */ + @Override + public void writeFloat(float vf) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + int v = Float.floatToIntBits(vf); + if ((v & (1 << 31)) != 0) { + // negative number, flip all bits + v = ~v; + } else { + // positive number, flip the first bit + v = v ^ (1 << 31); + } + BinarySortableSerDe.writeByte(output, (byte) (v >> 24), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 16), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 8), invert); + BinarySortableSerDe.writeByte(output, (byte) v, invert); + } + + /* + * DOUBLE. + */ + @Override + public void writeDouble(double vd) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + long v = Double.doubleToLongBits(vd); + if ((v & (1L << 63)) != 0) { + // negative number, flip all bits + v = ~v; + } else { + // positive number, flip the first bit + v = v ^ (1L << 63); + } + BinarySortableSerDe.writeByte(output, (byte) (v >> 56), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 48), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 40), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 32), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 24), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 16), invert); + BinarySortableSerDe.writeByte(output, (byte) (v >> 8), invert); + BinarySortableSerDe.writeByte(output, (byte) v, invert); + } + + /* + * STRING. + * + * Can be used to write CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + @Override + public void writeString(byte[] v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeBytes(output, v, 0, v.length, invert); + } + + @Override + public void writeString(byte[] v, int start, int length) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeBytes(output, v, start, length, invert); + } + + /* + * CHAR. + */ + @Override + public void writeHiveChar(HiveChar hiveChar) throws IOException { + String string = hiveChar.getStrippedValue(); + byte[] bytes = string.getBytes(); + writeString(bytes); + } + + /* + * VARCHAR. + */ + @Override + public void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException { + String string = hiveVarchar.getValue(); + byte[] bytes = string.getBytes(); + writeString(bytes); + } + + /* + * BINARY. + */ + @Override + public void writeBinary(byte[] v) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeBytes(output, v, 0, v.length, invert); + } + + @Override + public void writeBinary(byte[] v, int start, int length) { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeBytes(output, v, start, length, invert); + } + + /* + * DATE. + */ + @Override + public void writeDate(Date date) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeInt(output, DateWritable.dateToDays(date), invert); + } + + // We provide a faster way to write a date without a Date object. + @Override + public void writeDate(int dateAsDays) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeInt(output, dateAsDays, invert); + } + + /* + * TIMESTAMP. + */ + @Override + public void writeTimestamp(Timestamp vt) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + tempTimestampWritable.set(vt); + byte[] data = tempTimestampWritable.getBinarySortable(); + for (int i = 0; i < data.length; i++) { + BinarySortableSerDe.writeByte(output, data[i], invert); + } + } + + /* + * INTERVAL_YEAR_MONTH. + */ + @Override + public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + int totalMonths = viyt.getTotalMonths(); + BinarySortableSerDe.serializeInt(output, totalMonths, invert); + } + + @Override + public void writeHiveIntervalYearMonth(int totalMonths) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + BinarySortableSerDe.serializeInt(output, totalMonths, invert); + } + + /* + * INTERVAL_DAY_TIME. + */ + @Override + public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + long totalSecs = vidt.getTotalSeconds(); + int nanos = vidt.getNanos(); + BinarySortableSerDe.serializeLong(output, totalSecs, invert); + BinarySortableSerDe.serializeInt(output, nanos, invert); + } + + @Override + public void writeHiveIntervalDayTime(long totalNanos) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + long totalSecs = DateUtils.getIntervalDayTimeTotalSecondsFromTotalNanos(totalNanos); + int nanos = DateUtils.getIntervalDayTimeNanosFromTotalNanos(totalNanos); + BinarySortableSerDe.serializeLong(output, totalSecs, invert); + BinarySortableSerDe.serializeInt(output, nanos, invert); + } + + /* + * DECIMAL. + */ + @Override + public void writeHiveDecimal(HiveDecimal dec) throws IOException { + final boolean invert = columnSortOrderIsDesc[++index]; + + // This field is not a null. + BinarySortableSerDe.writeByte(output, (byte) 1, invert); + + // decimals are encoded in three pieces: + // sign: 1, 2 or 3 for smaller, equal or larger than 0 respectively + // factor: Number that indicates the amount of digits you have to move + // the decimal point left or right until the resulting number is smaller + // than zero but has something other than 0 as the first digit. + // digits: which is a string of all the digits in the decimal. If the number + // is negative the binary string will be inverted to get the correct ordering. + // Example: 0.00123 + // Sign is 3 (bigger than 0) + // Factor is -2 (move decimal point 2 positions right) + // Digits are: 123 + + // get the sign of the big decimal + int sign = dec.compareTo(HiveDecimal.ZERO); + + // we'll encode the absolute value (sign is separate) + dec = dec.abs(); + + // get the scale factor to turn big decimal into a decimal < 1 + int factor = dec.precision() - dec.scale(); + factor = sign == 1 ? factor : -factor; + + // convert the absolute big decimal to string + dec.scaleByPowerOfTen(Math.abs(dec.scale())); + String digits = dec.unscaledValue().toString(); + + // finally write out the pieces (sign, scale, digits) + BinarySortableSerDe.writeByte(output, (byte) ( sign + 1), invert); + BinarySortableSerDe.writeByte(output, (byte) ((factor >> 24) ^ 0x80), invert); + BinarySortableSerDe.writeByte(output, (byte) ( factor >> 16), invert); + BinarySortableSerDe.writeByte(output, (byte) ( factor >> 8), invert); + BinarySortableSerDe.writeByte(output, (byte) factor, invert); + BinarySortableSerDe.serializeBytes(output, digits.getBytes(BinarySortableSerDe.decimalCharSet), + digits.length(), sign == -1 ? !invert : invert); + } +} \ No newline at end of file Index: serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java (working copy) @@ -0,0 +1,387 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.fast; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; + +/* + * Directly deserialize with the caller reading field-by-field a serialization format. + * + * The caller is responsible for calling the read method for the right type of each field + * (after calling readCheckNull). + * + * Reading some fields require a results object to receive value information. A separate + * results object is created by the caller at initialization per different field even for the same + * type. + * + * Some type values are by reference to either bytes in the deserialization buffer or to + * other type specific buffers. So, those references are only valid until the next time set is + * called. + */ +public interface DeserializeRead { + + /* + * The primitive type information for all fields. + */ + PrimitiveTypeInfo[] primitiveTypeInfos(); + + /* + * Set the range of bytes to be deserialized. + */ + void set(byte[] bytes, int offset, int length); + + /* + * Reads the NULL information for a field. + * + * @return Return true when the field is NULL; reading is positioned to the next field. + * Otherwise, false when the field is NOT NULL; reading is positioned to the field data. + */ + boolean readCheckNull() throws IOException; + + /* + * Call this method after all fields have been read to check for extra fields. + */ + void extraFieldsCheck(); + + /* + * Read integrity warning flags. + */ + boolean readBeyondConfiguredFieldsWarned(); + boolean readBeyondBufferRangeWarned(); + boolean bufferRangeHasExtraDataWarned(); + + /* + * BOOLEAN. + */ + boolean readBoolean() throws IOException; + + /* + * BYTE. + */ + byte readByte() throws IOException; + + /* + * SHORT. + */ + short readShort() throws IOException; + + /* + * INT. + */ + int readInt() throws IOException; + + /* + * LONG. + */ + long readLong() throws IOException; + + /* + * FLOAT. + */ + float readFloat() throws IOException; + + /* + * DOUBLE. + */ + double readDouble() throws IOException; + + /* + * This class is the base abstract read bytes results for STRING, CHAR, VARCHAR, and BINARY. + */ + public abstract class ReadBytesResults { + + public byte[] bytes; + public int start; + public int length; + + public ReadBytesResults() { + bytes = null; + start = 0; + length = 0; + } + } + + /* + * STRING. + * + * Can be used to read CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadStringResults extends ReadBytesResults { + + public ReadStringResults() { + super(); + } + } + + // Reading a STRING field require a results object to receive value information. A separate + // results object is created at initialization per different bytes field. + ReadStringResults createReadStringResults(); + + void readString(ReadStringResults readStringResults) throws IOException; + + /* + * CHAR. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadHiveCharResults extends ReadBytesResults { + + private CharTypeInfo charTypeInfo; + private int maxLength; + + protected HiveCharWritable hiveCharWritable; + + public ReadHiveCharResults() { + super(); + } + + public void init(CharTypeInfo charTypeInfo) { + this.charTypeInfo = charTypeInfo; + this.maxLength = charTypeInfo.getLength(); + hiveCharWritable = new HiveCharWritable(); + } + + public boolean isInit() { + return (charTypeInfo != null); + } + + public int getMaxLength() { + return maxLength; + } + + public HiveChar getHiveChar() { + return hiveCharWritable.getHiveChar(); + } + } + + // Reading a CHAR field require a results object to receive value information. A separate + // results object is created at initialization per different CHAR field. + ReadHiveCharResults createReadHiveCharResults(); + + void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException; + + /* + * VARCHAR. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadHiveVarcharResults extends ReadBytesResults { + + private VarcharTypeInfo varcharTypeInfo; + private int maxLength; + + protected HiveVarcharWritable hiveVarcharWritable; + + public ReadHiveVarcharResults() { + super(); + } + + public void init(VarcharTypeInfo varcharTypeInfo) { + this.varcharTypeInfo = varcharTypeInfo; + this.maxLength = varcharTypeInfo.getLength(); + hiveVarcharWritable = new HiveVarcharWritable(); + } + + public boolean isInit() { + return (varcharTypeInfo != null); + } + + public int getMaxLength() { + return maxLength; + } + + public HiveVarchar getHiveVarchar() { + return hiveVarcharWritable.getHiveVarchar(); + } + } + + // Reading a VARCHAR field require a results object to receive value information. A separate + // results object is created at initialization per different VARCHAR field. + ReadHiveVarcharResults createReadHiveVarcharResults(); + + void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException; + + /* + * BINARY. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadBinaryResults extends ReadBytesResults { + + public ReadBinaryResults() { + super(); + } + } + + // Reading a BINARY field require a results object to receive value information. A separate + // results object is created at initialization per different bytes field. + ReadBinaryResults createReadBinaryResults(); + + void readBinary(ReadBinaryResults readBinaryResults) throws IOException; + + /* + * DATE. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadDateResults { + + protected DateWritable dateWritable; + + public ReadDateResults() { + dateWritable = new DateWritable(); + } + + public Date getDate() { + return dateWritable.get(); + } + + public int getDays() { + return dateWritable.getDays(); + } + } + + // Reading a DATE field require a results object to receive value information. A separate + // results object is created at initialization per different DATE field. + ReadDateResults createReadDateResults(); + + void readDate(ReadDateResults readDateResults) throws IOException; + + /* + * TIMESTAMP. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadTimestampResults { + + protected TimestampWritable timestampWritable; + + public ReadTimestampResults() { + timestampWritable = new TimestampWritable(); + } + + public Timestamp getTimestamp() { + return timestampWritable.getTimestamp(); + } + } + + // Reading a TIMESTAMP field require a results object to receive value information. A separate + // results object is created at initialization per different TIMESTAMP field. + ReadTimestampResults createReadTimestampResults(); + + void readTimestamp(ReadTimestampResults readTimestampResult) throws IOException; + + /* + * INTERVAL_YEAR_MONTH. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadIntervalYearMonthResults { + + protected HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable; + + public ReadIntervalYearMonthResults() { + hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); + } + + public HiveIntervalYearMonth getHiveIntervalYearMonth() { + return hiveIntervalYearMonthWritable.getHiveIntervalYearMonth(); + } + } + + // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. + // A separate results object is created at initialization per different INTERVAL_YEAR_MONTH field. + ReadIntervalYearMonthResults createReadIntervalYearMonthResults(); + + void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResult) throws IOException; + + /* + * INTERVAL_DAY_TIME. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadIntervalDayTimeResults { + + protected HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable; + + public ReadIntervalDayTimeResults() { + hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); + } + + public HiveIntervalDayTime getHiveIntervalDayTime() { + return hiveIntervalDayTimeWritable.getHiveIntervalDayTime(); + } + } + + // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. + // A separate results object is created at initialization per different INTERVAL_DAY_TIME field. + ReadIntervalDayTimeResults createReadIntervalDayTimeResults(); + + void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResult) throws IOException; + + /* + * DECIMAL. + */ + + // This class is for abstract since each format may need its own specialization. + public abstract class ReadDecimalResults { + + protected DecimalTypeInfo decimalTypeInfo; + + public ReadDecimalResults() { + } + + public void init(DecimalTypeInfo decimalTypeInfo) { + this.decimalTypeInfo = decimalTypeInfo; + } + + public boolean isInit() { + return (decimalTypeInfo != null); + } + + public abstract HiveDecimal getHiveDecimal(); + } + + // Reading a DECIMAL field require a results object to receive value information. A separate + // results object is created at initialization per different DECIMAL field. + ReadDecimalResults createReadDecimalResults(); + + void readHiveDecimal(ReadDecimalResults readDecimalResults) throws IOException; +} \ No newline at end of file Index: serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java (working copy) @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.fast; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.ByteStream.Output; + +/* + * Directly serialize with the caller writing field-by-field a serialization format. + * + * The caller is responsible for calling the write method for the right type of each field + * (or calling writeNull if the field is a NULL). + * + */ +public interface SerializeWrite { + + /* + * Set the buffer that will receive the serialized data. + */ + void set(Output output); + + /* + * Reset the previously supplied buffer that will receive the serialized data. + */ + void reset(); + + /* + * Write a NULL field. + */ + void writeNull() throws IOException; + + /* + * BOOLEAN. + */ + void writeBoolean(boolean v) throws IOException; + + /* + * BYTE. + */ + void writeByte(byte v) throws IOException; + + /* + * SHORT. + */ + void writeShort(short v) throws IOException; + + /* + * INT. + */ + void writeInt(int v) throws IOException; + + /* + * LONG. + */ + void writeLong(long v) throws IOException; + + /* + * FLOAT. + */ + void writeFloat(float vf) throws IOException; + + /* + * DOUBLE. + */ + void writeDouble(double vd) throws IOException; + + /* + * STRING. + * + * Can be used to write CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + void writeString(byte[] v) throws IOException; + void writeString(byte[] v, int start, int length) throws IOException; + + /* + * CHAR. + */ + void writeHiveChar(HiveChar hiveChar) throws IOException; + + /* + * VARCHAR. + */ + void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException; + + /* + * BINARY. + */ + void writeBinary(byte[] v) throws IOException; + void writeBinary(byte[] v, int start, int length) throws IOException; + + /* + * DATE. + */ + void writeDate(Date date) throws IOException; + + // We provide a faster way to write a date without a Date object. + void writeDate(int dateAsDays) throws IOException; + + /* + * TIMESTAMP. + */ + void writeTimestamp(Timestamp vt) throws IOException; + + /* + * INTERVAL_YEAR_MONTH. + */ + void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException; + + // We provide a faster way to write a hive interval year month without a HiveIntervalYearMonth object. + void writeHiveIntervalYearMonth(int totalMonths) throws IOException; + + /* + * INTERVAL_DAY_TIME. + */ + void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException; + + // We provide a faster way to write a hive interval day time without a HiveIntervalDayTime object. + void writeHiveIntervalDayTime(long totalNanos) throws IOException; + + /* + * DECIMAL. + */ + void writeHiveDecimal(HiveDecimal dec) throws IOException; +} Index: serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java (working copy) @@ -25,9 +25,6 @@ import java.util.TimeZone; import java.util.concurrent.TimeUnit; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.ByteStream.RandomAccessOutput; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; @@ -44,8 +41,6 @@ * */ public class DateWritable implements WritableComparable { - private static final Log LOG = LogFactory.getLog(DateWritable.class); - private static final long MILLIS_PER_DAY = TimeUnit.DAYS.toMillis(1); // Local time zone. @@ -136,7 +131,13 @@ public static int millisToDays(long millisLocal) { long millisUtc = millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal); - return (int)(millisUtc / MILLIS_PER_DAY); + int days; + if (millisUtc >= 0L) { + days = (int) (millisUtc / MILLIS_PER_DAY); + } else { + days = (int) ((millisUtc - 86399999) / MILLIS_PER_DAY); + } + return days; } public static int dateToDays(Date d) { Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java (working copy) @@ -54,7 +54,7 @@ } // todo this should be configured in serde - private byte[] decodeIfNeeded(byte[] recv) { + public static byte[] decodeIfNeeded(byte[] recv) { boolean arrayByteBase64 = Base64.isArrayByteBase64(recv); if (DEBUG_LOG_ENABLED && arrayByteBase64) { LOG.debug("Data only contains Base64 alphabets only so try to decode the data."); Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java (working copy) @@ -161,7 +161,7 @@ } } } else { - out.write(bytes, 0, len); + out.write(bytes, start, len); } } Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java (working copy) @@ -0,0 +1,1062 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazy.fast; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.nio.charset.CharacterCodingException; +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadIntervalDayTimeResults; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadIntervalYearMonthResults; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyBinary; +import org.apache.hadoop.hive.serde2.lazy.LazyByte; +import org.apache.hadoop.hive.serde2.lazy.LazyInteger; +import org.apache.hadoop.hive.serde2.lazy.LazyLong; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazyShort; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.TimestampParser; + +/* + * Directly deserialize with the caller reading field-by-field the LazySimple (text) + * serialization format. + * + * The caller is responsible for calling the read method for the right type of each field + * (after calling readCheckNull). + * + * Reading some fields require a results object to receive value information. A separate + * results object is created by the caller at initialization per different field even for the same + * type. + * + * Some type values are by reference to either bytes in the deserialization buffer or to + * other type specific buffers. So, those references are only valid until the next time set is + * called. + */ +public class LazySimpleDeserializeRead implements DeserializeRead { + public static final Log LOG = LogFactory.getLog(LazySimpleDeserializeRead.class.getName()); + + private PrimitiveTypeInfo[] primitiveTypeInfos; + + private LazySerDeParameters lazyParams; + + private byte separator; + private boolean lastColumnTakesRest; + private boolean isEscaped; + private byte escapeChar; + private byte[] nullSequenceBytes; + private boolean isExtendedBooleanLiteral; + + private byte[] bytes; + private int start; + private int offset; + private int end; + private int fieldCount; + private int fieldIndex; + private int fieldStart; + private int fieldLength; + + private boolean saveBool; + private byte saveByte; + private short saveShort; + private int saveInt; + private long saveLong; + private float saveFloat; + private double saveDouble; + private byte[] saveBytes; + private int saveBytesStart; + private int saveBytesLength; + private Date saveDate; + private Timestamp saveTimestamp; + private HiveIntervalYearMonth saveIntervalYearMonth; + private HiveIntervalDayTime saveIntervalDayTime; + private HiveDecimal saveDecimal; + private DecimalTypeInfo saveDecimalTypeInfo; + + private Text tempText; + private TimestampParser timestampParser; + + private boolean readBeyondConfiguredFieldsWarned; + private boolean readBeyondBufferRangeWarned; + private boolean bufferRangeHasExtraDataWarned; + + public LazySimpleDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos, + byte separator, LazySerDeParameters lazyParams) { + + this.primitiveTypeInfos = primitiveTypeInfos; + + this.separator = separator; + this.lazyParams = lazyParams; + + lastColumnTakesRest = lazyParams.isLastColumnTakesRest(); + isEscaped = lazyParams.isEscaped(); + escapeChar = lazyParams.getEscapeChar(); + nullSequenceBytes = lazyParams.getNullSequence().getBytes(); + isExtendedBooleanLiteral = lazyParams.isExtendedBooleanLiteral(); + + fieldCount = primitiveTypeInfos.length; + tempText = new Text(); + readBeyondConfiguredFieldsWarned = false; + readBeyondBufferRangeWarned = false; + bufferRangeHasExtraDataWarned = false; + } + + // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. + private LazySimpleDeserializeRead() { + } + + /* + * The primitive type information for all fields. + */ + public PrimitiveTypeInfo[] primitiveTypeInfos() { + return primitiveTypeInfos; + } + + /* + * Set the range of bytes to be deserialized. + */ + @Override + public void set(byte[] bytes, int offset, int length) { + this.bytes = bytes; + this.offset = offset; + start = offset; + end = offset + length; + fieldIndex = -1; + } + + /* + * Reads the NULL information for a field. + * + * @return Returns true when the field is NULL; reading is positioned to the next field. + * Otherwise, false when the field is NOT NULL; reading is positioned to the field data. + */ + @Override + public boolean readCheckNull() { + if (++fieldIndex >= fieldCount) { + // Reading beyond the specified field count produces NULL. + if (!readBeyondConfiguredFieldsWarned) { + // Warn only once. + LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but " + + " reading more (NULLs returned). Ignoring similar problems."); + readBeyondConfiguredFieldsWarned = true; + } + return true; + } + if (offset > end) { + // We must allow for an empty field at the end, so no strict >= checking. + if (!readBeyondBufferRangeWarned) { + // Warn only once. + int length = end - start; + LOG.info("Reading beyond buffer range! Buffer range " + start + + " for length " + length + " but reading more (NULLs returned)." + + " Ignoring similar problems."); + readBeyondBufferRangeWarned = true; + } + + // char[] charsBuffer = new char[end - start]; + // for (int c = 0; c < charsBuffer.length; c++) { + // charsBuffer[c] = (char) (bytes[start + c] & 0xFF); + // } + + return true; + } + + fieldStart = offset; + while (true) { + if (offset >= end) { + fieldLength = offset - fieldStart; + break; + } + if (bytes[offset] == separator) { + fieldLength = (offset++ - fieldStart); + break; + } + if (isEscaped && bytes[offset] == escapeChar + && offset + 1 < end) { + // Ignore the char after escape char. + offset += 2; + } else { + offset++; + } + } + + char[] charField = new char[fieldLength]; + for (int c = 0; c < charField.length; c++) { + charField[c] = (char) (bytes[fieldStart + c] & 0xFF); + } + + // Is the field the configured string representing NULL? + if (nullSequenceBytes != null) { + if (fieldLength == nullSequenceBytes.length) { + int i = 0; + while (true) { + if (bytes[fieldStart + i] != nullSequenceBytes[i]) { + break; + } + i++; + if (i >= fieldLength) { + return true; + } + } + } + } + + switch (primitiveTypeInfos[fieldIndex].getPrimitiveCategory()) { + case BOOLEAN: + { + int i = fieldStart; + if (fieldLength == 4) { + if ((bytes[i] == 'T' || bytes[i] == 't') && + (bytes[i + 1] == 'R' || bytes[i + 1] == 'r') && + (bytes[i + 2] == 'U' || bytes[i + 1] == 'u') && + (bytes[i + 3] == 'E' || bytes[i + 3] == 'e')) { + saveBool = true; + } else { + // No boolean value match for 5 char field. + return true; + } + } else if (fieldLength == 5) { + if ((bytes[i] == 'F' || bytes[i] == 'f') && + (bytes[i + 1] == 'A' || bytes[i + 1] == 'a') && + (bytes[i + 2] == 'L' || bytes[i + 2] == 'l') && + (bytes[i + 3] == 'S' || bytes[i + 3] == 's') && + (bytes[i + 4] == 'E' || bytes[i + 4] == 'e')) { + saveBool = false; + } else { + // No boolean value match for 4 char field. + return true; + } + } else if (isExtendedBooleanLiteral && fieldLength == 1) { + byte b = bytes[fieldStart]; + if (b == '1' || b == 't' || b == 'T') { + saveBool = true; + } else if (b == '0' || b == 'f' || b == 'F') { + saveBool = false; + } else { + // No boolean value match for extended 1 char field. + return true; + } + } else { + // No boolean value match for other lengths. + return true; + } + } + break; + case BYTE: + try { + saveByte = LazyByte.parseByte(bytes, fieldStart, fieldLength, 10); + } catch (NumberFormatException e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "TINYINT"); + return true; + } +// if (!parseLongFast()) { +// return true; +// } +// saveShort = (short) saveLong; +// if (saveShort != saveLong) { +// return true; +// } + break; + case SHORT: + try { + saveShort = LazyShort.parseShort(bytes, fieldStart, fieldLength, 10); + } catch (NumberFormatException e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "SMALLINT"); + return true; + } +// if (!parseLongFast()) { +// return true; +// } +// saveShort = (short) saveLong; +// if (saveShort != saveLong) { +// return true; +// } + break; + case INT: + try { + saveInt = LazyInteger.parseInt(bytes, fieldStart, fieldLength, 10); + } catch (NumberFormatException e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "INT"); + return true; + } +// if (!parseLongFast()) { +// return true; +// } +// saveInt = (int) saveLong; +// if (saveInt != saveLong) { +// return true; +// } + break; + case LONG: + try { + saveLong = LazyLong.parseLong(bytes, fieldStart, fieldLength, 10); + } catch (NumberFormatException e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "BIGINT"); + return true; + } +// if (!parseLongFast()) { +// return true; +// } + break; + case FLOAT: + { + String byteData = null; + try { + byteData = Text.decode(bytes, fieldStart, fieldLength); + saveFloat = Float.parseFloat(byteData); + } catch (NumberFormatException e) { + LOG.debug("Data not in the Float data type range so converted to null. Given data is :" + + byteData, e); + return true; + } catch (CharacterCodingException e) { + LOG.debug("Data not in the Float data type range so converted to null.", e); + return true; + } + } +// if (!parseFloat()) { +// return true; +// } + break; + case DOUBLE: + { + String byteData = null; + try { + byteData = Text.decode(bytes, fieldStart, fieldLength); + saveDouble = Double.parseDouble(byteData); + } catch (NumberFormatException e) { + LOG.debug("Data not in the Double data type range so converted to null. Given data is :" + + byteData, e); + return true; + } catch (CharacterCodingException e) { + LOG.debug("Data not in the Double data type range so converted to null.", e); + return true; + } + } +// if (!parseDouble()) { +// return true; +// } + break; + + case STRING: + case CHAR: + case VARCHAR: + if (isEscaped) { + LazyUtils.copyAndEscapeStringDataToText(bytes, fieldStart, fieldLength, escapeChar, tempText); + saveBytes = tempText.getBytes(); + saveBytesStart = 0; + saveBytesLength = tempText.getLength(); + } else { + // if the data is not escaped, simply copy the data. + saveBytes = bytes; + saveBytesStart = fieldStart; + saveBytesLength = fieldLength; + } + break; + case BINARY: + { + byte[] recv = new byte[fieldLength]; + System.arraycopy(bytes, fieldStart, recv, 0, fieldLength); + byte[] decoded = LazyBinary.decodeIfNeeded(recv); + // use the original bytes in case decoding should fail + decoded = decoded.length > 0 ? decoded : recv; + saveBytes = decoded; + saveBytesStart = 0; + saveBytesLength = decoded.length; + } + break; + case DATE: + { + String s = null; + try { + s = Text.decode(bytes, fieldStart, fieldLength); + saveDate = Date.valueOf(s); + } catch (Exception e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "DATE"); + return true; + } + } +// if (!parseDate()) { +// return true; +// } + break; + case TIMESTAMP: + { + String s = null; + try { + s = new String(bytes, fieldStart, fieldLength, "US-ASCII"); + } catch (UnsupportedEncodingException e) { + LOG.error(e); + s = ""; + } + + if (s.compareTo("NULL") == 0) { + logExceptionMessage(bytes, fieldStart, fieldLength, "TIMESTAMP"); + return true; + } else { + try { + if (timestampParser == null) { + timestampParser = new TimestampParser(); + } + saveTimestamp = timestampParser.parseTimestamp(s); + } catch (IllegalArgumentException e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "TIMESTAMP"); + return true; + } + } + } +// if (!parseTimestamp()) { +// return true; +// } + break; + case INTERVAL_YEAR_MONTH: + { + String s = null; + try { + s = Text.decode(bytes, fieldStart, fieldLength); + saveIntervalYearMonth = HiveIntervalYearMonth.valueOf(s); + } catch (Exception e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "INTERVAL_YEAR_MONTH"); + return true; + } + } +// if (!parseIntervalYearMonth()) { +// return true; +// } + break; + case INTERVAL_DAY_TIME: + { + String s = null; + try { + s = Text.decode(bytes, fieldStart, fieldLength); + saveIntervalDayTime = HiveIntervalDayTime.valueOf(s); + } catch (Exception e) { + logExceptionMessage(bytes, fieldStart, fieldLength, "INTERVAL_DAY_TIME"); + return true; + } + } +// if (!parseIntervalDayTime()) { +// return true; +// } + break; + case DECIMAL: + { + String byteData = null; + try { + byteData = Text.decode(bytes, fieldStart, fieldLength); + } catch (CharacterCodingException e) { + LOG.debug("Data not in the HiveDecimal data type range so converted to null.", e); + return true; + } + + saveDecimal = HiveDecimal.create(byteData); + saveDecimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfos[fieldIndex]; + int precision = saveDecimalTypeInfo.getPrecision(); + int scale = saveDecimalTypeInfo.getScale(); + saveDecimal = HiveDecimalUtils.enforcePrecisionScale(saveDecimal, precision, scale); + if (saveDecimal == null) { + LOG.debug("Data not in the HiveDecimal data type range so converted to null. Given data is :" + + byteData); + return true; + } + } +// if (!parseDecimal()) { +// return true; +// } + break; + + default: + throw new Error("Unexpected primitive category " + primitiveTypeInfos[fieldIndex].getPrimitiveCategory()); + } + + return false; + } + + public void logExceptionMessage(byte[] bytes, int bytesStart, int bytesLength, String dataType) { + try { + if(LOG.isDebugEnabled()) { + String byteData = Text.decode(bytes, bytesStart, bytesLength); + LOG.debug("Data not in the " + dataType + + " data type range so converted to null. Given data is :" + + byteData, new Exception("For debugging purposes")); + } + } catch (CharacterCodingException e1) { + LOG.debug("Data not in the " + dataType + " data type range so converted to null.", e1); + } + } + + /* + * Call this method after all fields have been read to check for extra fields. + */ + public void extraFieldsCheck() { + if (offset < end) { + // We did not consume all of the byte range. + if (!bufferRangeHasExtraDataWarned) { + // Warn only once. + int length = end - start; + LOG.info("Not all fields were read in the buffer range! Buffer range " + start + + " for length " + length + " but reading more (NULLs returned)." + + " Ignoring similar problems."); + bufferRangeHasExtraDataWarned = true; + } + } + } + + /* + * Read integrity warning flags. + */ + @Override + public boolean readBeyondConfiguredFieldsWarned() { + return readBeyondConfiguredFieldsWarned; + } + @Override + public boolean readBeyondBufferRangeWarned() { + return readBeyondBufferRangeWarned; + } + @Override + public boolean bufferRangeHasExtraDataWarned() { + return bufferRangeHasExtraDataWarned; + } + + /* + * BOOLEAN. + */ + @Override + public boolean readBoolean() { + return saveBool; + } + + /* + * BYTE. + */ + @Override + public byte readByte() { + return saveByte; + } + + /* + * SHORT. + */ + @Override + public short readShort() { + return saveShort; + } + + /* + * INT. + */ + @Override + public int readInt() { + return saveInt; + } + + /* + * LONG. + */ + @Override + public long readLong() { + return saveLong; + } + + /* + * FLOAT. + */ + @Override + public float readFloat() { + return saveFloat; + } + + /* + * DOUBLE. + */ + @Override + public double readDouble() { + return saveDouble; + } + + /* + * STRING. + * + * Can be used to read CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + + // This class is for internal use. + private class LazySimpleReadStringResults extends ReadStringResults { + public LazySimpleReadStringResults() { + super(); + } + } + + // Reading a STRING field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadStringResults createReadStringResults() { + return new LazySimpleReadStringResults(); + } + + @Override + public void readString(ReadStringResults readStringResults) { + readStringResults.bytes = saveBytes; + readStringResults.start = saveBytesStart; + readStringResults.length = saveBytesLength; + } + + /* + * CHAR. + */ + + // This class is for internal use. + private static class LazySimpleReadHiveCharResults extends ReadHiveCharResults { + + // Use our STRING reader. + public LazySimpleReadStringResults readStringResults; + + public LazySimpleReadHiveCharResults() { + super(); + } + + public HiveCharWritable getHiveCharWritable() { + return hiveCharWritable; + } + } + + // Reading a CHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different CHAR field. + @Override + public ReadHiveCharResults createReadHiveCharResults() { + return new LazySimpleReadHiveCharResults(); + } + + public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException { + LazySimpleReadHiveCharResults LazySimpleReadHiveCharResults = (LazySimpleReadHiveCharResults) readHiveCharResults; + + if (!LazySimpleReadHiveCharResults.isInit()) { + LazySimpleReadHiveCharResults.init((CharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + if (LazySimpleReadHiveCharResults.readStringResults == null) { + LazySimpleReadHiveCharResults.readStringResults = new LazySimpleReadStringResults(); + } + LazySimpleReadStringResults readStringResults = LazySimpleReadHiveCharResults.readStringResults; + + // Read the bytes using our basic method. + readString(readStringResults); + + // Copy the bytes into our Text object, then truncate. + HiveCharWritable hiveCharWritable = LazySimpleReadHiveCharResults.getHiveCharWritable(); + hiveCharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); + hiveCharWritable.enforceMaxLength(LazySimpleReadHiveCharResults.getMaxLength()); + + readHiveCharResults.bytes = hiveCharWritable.getTextValue().getBytes(); + readHiveCharResults.start = 0; + readHiveCharResults.length = hiveCharWritable.getTextValue().getLength(); + } + + /* + * VARCHAR. + */ + + // This class is for internal use. + private static class LazySimpleReadHiveVarcharResults extends ReadHiveVarcharResults { + + // Use our bytes reader. + public LazySimpleReadStringResults readStringResults; + + public LazySimpleReadHiveVarcharResults() { + super(); + } + + public HiveVarcharWritable getHiveVarcharWritable() { + return hiveVarcharWritable; + } + } + + // Reading a VARCHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different VARCHAR field. + @Override + public ReadHiveVarcharResults createReadHiveVarcharResults() { + return new LazySimpleReadHiveVarcharResults(); + } + + public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException { + LazySimpleReadHiveVarcharResults lazySimpleReadHiveVarvarcharResults = (LazySimpleReadHiveVarcharResults) readHiveVarcharResults; + + if (!lazySimpleReadHiveVarvarcharResults.isInit()) { + lazySimpleReadHiveVarvarcharResults.init((VarcharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + if (lazySimpleReadHiveVarvarcharResults.readStringResults == null) { + lazySimpleReadHiveVarvarcharResults.readStringResults = new LazySimpleReadStringResults(); + } + LazySimpleReadStringResults readStringResults = lazySimpleReadHiveVarvarcharResults.readStringResults; + + // Read the bytes using our basic method. + readString(readStringResults); + + // Copy the bytes into our Text object, then truncate. + HiveVarcharWritable hiveVarcharWritable = lazySimpleReadHiveVarvarcharResults.getHiveVarcharWritable(); + hiveVarcharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); + hiveVarcharWritable.enforceMaxLength(lazySimpleReadHiveVarvarcharResults.getMaxLength()); + + readHiveVarcharResults.bytes = hiveVarcharWritable.getTextValue().getBytes(); + readHiveVarcharResults.start = 0; + readHiveVarcharResults.length = hiveVarcharWritable.getTextValue().getLength(); + } + + /* + * BINARY. + */ + + // This class is for internal use. + private class LazySimpleReadBinaryResults extends ReadBinaryResults { + public LazySimpleReadBinaryResults() { + super(); + } + } + + // Reading a BINARY field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadBinaryResults createReadBinaryResults() { + return new LazySimpleReadBinaryResults(); + } + + @Override + public void readBinary(ReadBinaryResults readBinaryResults) { + readBinaryResults.bytes = saveBytes; + readBinaryResults.start = saveBytesStart; + readBinaryResults.length = saveBytesLength; + } + + /* + * DATE. + */ + + // This class is for internal use. + private static class LazySimpleReadDateResults extends ReadDateResults { + + public LazySimpleReadDateResults() { + super(); + } + + public DateWritable getDateWritable() { + return dateWritable; + } + } + + // Reading a DATE field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DATE field. + @Override + public ReadDateResults createReadDateResults() { + return new LazySimpleReadDateResults(); + } + + @Override + public void readDate(ReadDateResults readDateResults) { + LazySimpleReadDateResults lazySimpleReadDateResults = (LazySimpleReadDateResults) readDateResults; + + DateWritable dateWritable = lazySimpleReadDateResults.getDateWritable(); + dateWritable.set(saveDate); + saveDate = null; + } + + + /* + * INTERVAL_YEAR_MONTH. + */ + + // This class is for internal use. + private static class LazySimpleReadIntervalYearMonthResults extends ReadIntervalYearMonthResults { + + public LazySimpleReadIntervalYearMonthResults() { + super(); + } + + public HiveIntervalYearMonthWritable getHiveIntervalYearMonthWritable() { + return hiveIntervalYearMonthWritable; + } + } + + // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_YEAR_MONTH field. + @Override + public ReadIntervalYearMonthResults createReadIntervalYearMonthResults() { + return new LazySimpleReadIntervalYearMonthResults(); + } + + @Override + public void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResults) + throws IOException { + LazySimpleReadIntervalYearMonthResults lazySimpleReadIntervalYearMonthResults = + (LazySimpleReadIntervalYearMonthResults) readIntervalYearMonthResults; + + HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable = + lazySimpleReadIntervalYearMonthResults.getHiveIntervalYearMonthWritable(); + hiveIntervalYearMonthWritable.set(saveIntervalYearMonth); + saveIntervalYearMonth = null; + } + + /* + * INTERVAL_DAY_TIME. + */ + + // This class is for internal use. + private static class LazySimpleReadIntervalDayTimeResults extends ReadIntervalDayTimeResults { + + public LazySimpleReadIntervalDayTimeResults() { + super(); + } + + public HiveIntervalDayTimeWritable getHiveIntervalDayTimeWritable() { + return hiveIntervalDayTimeWritable; + } + } + + // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_DAY_TIME field. + @Override + public ReadIntervalDayTimeResults createReadIntervalDayTimeResults() { + return new LazySimpleReadIntervalDayTimeResults(); + } + + @Override + public void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResults) + throws IOException { + LazySimpleReadIntervalDayTimeResults lazySimpleReadIntervalDayTimeResults = + (LazySimpleReadIntervalDayTimeResults) readIntervalDayTimeResults; + + HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable = + lazySimpleReadIntervalDayTimeResults.getHiveIntervalDayTimeWritable(); + hiveIntervalDayTimeWritable.set(saveIntervalDayTime); + saveIntervalDayTime = null; + } + + /* + * TIMESTAMP. + */ + + // This class is for internal use. + private static class LazySimpleReadTimestampResults extends ReadTimestampResults { + + public LazySimpleReadTimestampResults() { + super(); + } + + public TimestampWritable getTimestampWritable() { + return timestampWritable; + } + } + + // Reading a TIMESTAMP field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different TIMESTAMP field. + @Override + public ReadTimestampResults createReadTimestampResults() { + return new LazySimpleReadTimestampResults(); + } + + @Override + public void readTimestamp(ReadTimestampResults readTimestampResults) { + LazySimpleReadTimestampResults lazySimpleReadTimestampResults = + (LazySimpleReadTimestampResults) readTimestampResults; + + TimestampWritable timestampWritable = lazySimpleReadTimestampResults.getTimestampWritable(); + timestampWritable.set(saveTimestamp); + saveTimestamp = null; + } + + /* + * DECIMAL. + */ + + // This class is for internal use. + private static class LazySimpleReadDecimalResults extends ReadDecimalResults { + + HiveDecimal hiveDecimal; + + public LazySimpleReadDecimalResults() { + super(); + } + + @Override + public HiveDecimal getHiveDecimal() { + return hiveDecimal; + } + } + + // Reading a DECIMAL field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DECIMAL field. + @Override + public ReadDecimalResults createReadDecimalResults() { + return new LazySimpleReadDecimalResults(); + } + + @Override + public void readHiveDecimal(ReadDecimalResults readDecimalResults) { + LazySimpleReadDecimalResults lazySimpleReadDecimalResults = (LazySimpleReadDecimalResults) readDecimalResults; + + if (!lazySimpleReadDecimalResults.isInit()) { + lazySimpleReadDecimalResults.init(saveDecimalTypeInfo); + } + + lazySimpleReadDecimalResults.hiveDecimal = saveDecimal; + + saveDecimal = null; + saveDecimalTypeInfo = null; + } + + private static byte[] maxLongBytes = ((Long) Long.MAX_VALUE).toString().getBytes(); + private static int maxLongDigitsCount = maxLongBytes.length; + private static byte[] minLongNoSignBytes = ((Long) Long.MIN_VALUE).toString().substring(1).getBytes(); + + private boolean parseLongFast() { + + // Parse without using exceptions for better performance. + int i = fieldStart; + int end = fieldStart + fieldLength; + boolean negative = false; + if (i >= end) { + return false; // Empty field. + } + if (bytes[i] == '+') { + i++; + if (i >= end) { + return false; + } + } else if (bytes[i] == '-') { + negative = true; + i++; + if (i >= end) { + return false; + } + } + // Skip leading zeros. + boolean atLeastOneZero = false; + while (true) { + if (bytes[i] != '0') { + break; + } + i++; + if (i >= end) { + saveLong = 0; + return true; + } + atLeastOneZero = true; + } + // We tolerate and ignore decimal places. + if (bytes[i] == '.') { + if (!atLeastOneZero) { + return false; + } + saveLong = 0; + // Fall through below and verify trailing decimal digits. + } else { + if (!Character.isDigit(bytes[i])) { + return false; + } + int nonLeadingZeroStart = i; + int digitCount = 1; + saveLong = Character.digit(bytes[i], 10); + i++; + while (i < end) { + if (!Character.isDigit(bytes[i])) { + break; + } + digitCount++; + if (digitCount > maxLongDigitsCount) { + return false; + } else if (digitCount == maxLongDigitsCount) { + // Use the old trick of comparing against number string to check for overflow. + if (!negative) { + if (byteArrayCompareRanges(bytes, nonLeadingZeroStart, maxLongBytes, 0, digitCount) >= 1) { + return false; + } + } else { + if (byteArrayCompareRanges(bytes, nonLeadingZeroStart, minLongNoSignBytes, 0, digitCount) >= 1) { + return false; + } + } + } + saveLong = (saveLong * 10) + Character.digit(bytes[i], 10); + } + if (negative) { + // Safe because of our number string comparision against min (negative) long. + saveLong = -saveLong; + } + if (i >= end) { + return true; + } + if (bytes[i] != '.') { + return false; + } + } + // Fall through to here if we detect the start of trailing decimal digits... + // We verify trailing digits only. + while (true) { + i++; + if (i >= end) { + break; + } + if (!Character.isDigit(bytes[i])) { + return false; + } + } + return true; + } + + public static int byteArrayCompareRanges(byte[] arg1, int start1, byte[] arg2, int start2, int len) { + for (int i = 0; i < len; i++) { + // Note the "& 0xff" is just a way to convert unsigned bytes to signed integer. + int b1 = arg1[i + start1] & 0xff; + int b2 = arg2[i + start2] & 0xff; + if (b1 != b2) { + return b1 - b2; + } + } + return 0; + } + +} \ No newline at end of file Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java (working copy) @@ -0,0 +1,510 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazy.fast; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyDate; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveDecimal; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalDayTime; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalYearMonth; +import org.apache.hadoop.hive.serde2.lazy.LazyInteger; +import org.apache.hadoop.hive.serde2.lazy.LazyLong; +import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateUtils; + +/* + * Directly serialize, field-by-field, the LazyBinary format. +* + * This is an alternative way to serialize than what is provided by LazyBinarySerDe. + */ +public class LazySimpleSerializeWrite implements SerializeWrite { + public static final Log LOG = LogFactory.getLog(LazySimpleSerializeWrite.class.getName()); + + private LazySerDeParameters lazyParams; + + private byte separator; + private boolean[] needsEscape; + private boolean isEscaped; + private byte escapeChar; + private byte[] nullSequenceBytes; + + private Output output; + + private int fieldCount; + private int index; + + // For thread safety, we allocate private writable objects for our use only. + private DateWritable dateWritable; + private TimestampWritable timestampWritable; + private HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable; + private HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable; + private HiveIntervalDayTime hiveIntervalDayTime; + + public LazySimpleSerializeWrite(int fieldCount, + byte separator, LazySerDeParameters lazyParams) { + + this(); + this.fieldCount = fieldCount; + + this.separator = separator; + this.lazyParams = lazyParams; + + isEscaped = lazyParams.isEscaped(); + escapeChar = lazyParams.getEscapeChar(); + needsEscape = lazyParams.getNeedsEscape(); + nullSequenceBytes = lazyParams.getNullSequence().getBytes(); + } + + // Not public since we must have the field count and other information. + private LazySimpleSerializeWrite() { + } + + /* + * Set the buffer that will receive the serialized data. + */ + @Override + public void set(Output output) { + this.output = output; + output.reset(); + index = 0; + } + + /* + * Reset the previously supplied buffer that will receive the serialized data. + */ + @Override + public void reset() { + output.reset(); + index = 0; + } + + /* + * General Pattern: + * + * if (index > 0) { + * output.write(separator); + * } + * + * WHEN NOT NULL: Write value. + * OTHERWISE NULL: Write nullSequenceBytes. + * + * Increment index + * + */ + + /* + * Write a NULL field. + */ + @Override + public void writeNull() throws IOException { + + if (index > 0) { + output.write(separator); + } + + output.write(nullSequenceBytes); + + index++; + } + + /* + * BOOLEAN. + */ + @Override + public void writeBoolean(boolean v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (v) { + output.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length); + } else { + output.write(LazyUtils.falseBytes, 0, LazyUtils.falseBytes.length); + } + + index++; + } + + /* + * BYTE. + */ + @Override + public void writeByte(byte v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyInteger.writeUTF8(output, v); + + index++; + } + + /* + * SHORT. + */ + @Override + public void writeShort(short v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyInteger.writeUTF8(output, v); + + index++; + } + + /* + * INT. + */ + @Override + public void writeInt(int v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyInteger.writeUTF8(output, v); + + index++; + } + + /* + * LONG. + */ + @Override + public void writeLong(long v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyLong.writeUTF8(output, v); + + index++; + } + + /* + * FLOAT. + */ + @Override + public void writeFloat(float vf) throws IOException { + + if (index > 0) { + output.write(separator); + } + + ByteBuffer b = Text.encode(String.valueOf(vf)); + output.write(b.array(), 0, b.limit()); + + index++; + } + + /* + * DOUBLE. + */ + @Override + public void writeDouble(double v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + ByteBuffer b = Text.encode(String.valueOf(v)); + output.write(b.array(), 0, b.limit()); + + index++; + } + + /* + * STRING. + * + * Can be used to write CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + @Override + public void writeString(byte[] v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyUtils.writeEscaped(output, v, 0, v.length, isEscaped, escapeChar, + needsEscape); + + index++; + } + + @Override + public void writeString(byte[] v, int start, int length) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyUtils.writeEscaped(output, v, start, length, isEscaped, escapeChar, + needsEscape); + + index++; + } + + /* + * CHAR. + */ + @Override + public void writeHiveChar(HiveChar hiveChar) throws IOException { + + if (index > 0) { + output.write(separator); + } + + ByteBuffer b = Text.encode(hiveChar.getPaddedValue()); + LazyUtils.writeEscaped(output, b.array(), 0, b.limit(), isEscaped, escapeChar, + needsEscape); + + index++; + } + + /* + * VARCHAR. + */ + @Override + public void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException { + + if (index > 0) { + output.write(separator); + } + + ByteBuffer b = Text.encode(hiveVarchar.getValue()); + LazyUtils.writeEscaped(output, b.array(), 0, b.limit(), isEscaped, escapeChar, + needsEscape); + + index++; + } + + /* + * BINARY. + */ + @Override + public void writeBinary(byte[] v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + byte[] toEncode = new byte[v.length]; + System.arraycopy(v, 0, toEncode, 0, v.length); + byte[] toWrite = Base64.encodeBase64(toEncode); + output.write(toWrite, 0, toWrite.length); + + index++; + } + + @Override + public void writeBinary(byte[] v, int start, int length) throws IOException { + + if (index > 0) { + output.write(separator); + } + + byte[] toEncode = new byte[length]; + System.arraycopy(v, start, toEncode, 0, length); + byte[] toWrite = Base64.encodeBase64(toEncode); + output.write(toWrite, 0, toWrite.length); + + index++; + } + + /* + * DATE. + */ + @Override + public void writeDate(Date date) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (dateWritable == null) { + dateWritable = new DateWritable(); + } + dateWritable.set(date); + LazyDate.writeUTF8(output, dateWritable); + + index++; + } + + // We provide a faster way to write a date without a Date object. + @Override + public void writeDate(int dateAsDays) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (dateWritable == null) { + dateWritable = new DateWritable(); + } + dateWritable.set(dateAsDays); + LazyDate.writeUTF8(output, dateWritable); + + index++; + } + + /* + * TIMESTAMP. + */ + @Override + public void writeTimestamp(Timestamp v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (timestampWritable == null) { + timestampWritable = new TimestampWritable(); + } + timestampWritable.set(v); + LazyTimestamp.writeUTF8(output, timestampWritable); + + index++; + } + + /* + * INTERVAL_YEAR_MONTH. + */ + @Override + public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (hiveIntervalYearMonthWritable == null) { + hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); + } + hiveIntervalYearMonthWritable.set(viyt); + LazyHiveIntervalYearMonth.writeUTF8(output, hiveIntervalYearMonthWritable); + + index++; + } + + + @Override + public void writeHiveIntervalYearMonth(int totalMonths) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (hiveIntervalYearMonthWritable == null) { + hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); + } + hiveIntervalYearMonthWritable.set(totalMonths); + LazyHiveIntervalYearMonth.writeUTF8(output, hiveIntervalYearMonthWritable); + + index++; + } + + /* + * INTERVAL_DAY_TIME. + */ + @Override + public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (hiveIntervalDayTimeWritable == null) { + hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); + } + hiveIntervalDayTimeWritable.set(vidt); + LazyHiveIntervalDayTime.writeUTF8(output, hiveIntervalDayTimeWritable); + + index++; + } + + @Override + public void writeHiveIntervalDayTime(long totalNanos) throws IOException { + + if (index > 0) { + output.write(separator); + } + + if (hiveIntervalDayTime == null) { + hiveIntervalDayTime = new HiveIntervalDayTime(); + } + if (hiveIntervalDayTimeWritable == null) { + hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); + } + DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, totalNanos); + hiveIntervalDayTimeWritable.set(hiveIntervalDayTime); + LazyHiveIntervalDayTime.writeUTF8(output, hiveIntervalDayTimeWritable); + + index++; + } + + /* + * DECIMAL. + */ + @Override + public void writeHiveDecimal(HiveDecimal v) throws IOException { + + if (index > 0) { + output.write(separator); + } + + LazyHiveDecimal.writeUTF8(output, v); + + index++; + } +} \ No newline at end of file Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java (working copy) @@ -166,15 +166,18 @@ // Extra bytes at the end? if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) { extraFieldWarned = true; - LOG.warn("Extra bytes detected at the end of the row! Ignoring similar " - + "problems."); + LOG.warn("Extra bytes detected at the end of the row! " + + "Last field end " + lastFieldByteEnd + " and serialize buffer end " + structByteEnd + ". " + + "Ignoring similar problems."); } // Missing fields? if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) { missingFieldWarned = true; - LOG.info("Missing fields! Expected " + fields.length + " fields but " - + "only got " + fieldId + "! Ignoring similar problems."); + LOG.info("Missing fields! Expected " + fields.length + " fields but " + + "only got " + fieldId + "! " + + "Last field end " + lastFieldByteEnd + " and serialize buffer end " + structByteEnd + ". " + + "Ignoring similar problems."); } Arrays.fill(fieldInited, false); Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java (working copy) @@ -0,0 +1,942 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazybinary.fast; + +import java.io.EOFException; +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; + +/* + * Directly deserialize with the caller reading field-by-field the LazyBinary serialization format. + * + * The caller is responsible for calling the read method for the right type of each field + * (after calling readCheckNull). + * + * Reading some fields require a results object to receive value information. A separate + * results object is created by the caller at initialization per different field even for the same + * type. + * + * Some type values are by reference to either bytes in the deserialization buffer or to + * other type specific buffers. So, those references are only valid until the next time set is + * called. + */ +public class LazyBinaryDeserializeRead implements DeserializeRead { + public static final Log LOG = LogFactory.getLog(LazyBinaryDeserializeRead.class.getName()); + + private PrimitiveTypeInfo[] primitiveTypeInfos; + + private byte[] bytes; + private int start; + private int offset; + private int end; + private int fieldCount; + private int fieldIndex; + private byte nullByte; + + private DecimalTypeInfo saveDecimalTypeInfo; + private HiveDecimal saveDecimal; + + // Object to receive results of reading a decoded variable length int or long. + private VInt tempVInt; + private VLong tempVLong; + private HiveDecimalWritable tempHiveDecimalWritable; + + private boolean readBeyondConfiguredFieldsWarned; + private boolean readBeyondBufferRangeWarned; + private boolean bufferRangeHasExtraDataWarned; + + public LazyBinaryDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos) { + this.primitiveTypeInfos = primitiveTypeInfos; + fieldCount = primitiveTypeInfos.length; + tempVInt = new VInt(); + tempVLong = new VLong(); + readBeyondConfiguredFieldsWarned = false; + readBeyondBufferRangeWarned = false; + bufferRangeHasExtraDataWarned = false; + } + + // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. + private LazyBinaryDeserializeRead() { + } + + /* + * The primitive type information for all fields. + */ + public PrimitiveTypeInfo[] primitiveTypeInfos() { + return primitiveTypeInfos; + } + + /* + * Set the range of bytes to be deserialized. + */ + @Override + public void set(byte[] bytes, int offset, int length) { + this.bytes = bytes; + this.offset = offset; + start = offset; + end = offset + length; + fieldIndex = 0; + } + + /* + * Reads the NULL information for a field. + * + * @return Returns true when the field is NULL; reading is positioned to the next field. + * Otherwise, false when the field is NOT NULL; reading is positioned to the field data. + */ + @Override + public boolean readCheckNull() throws IOException { + if (fieldIndex >= fieldCount) { + // Reading beyond the specified field count produces NULL. + if (!readBeyondConfiguredFieldsWarned) { + // Warn only once. + LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but " + + " reading more (NULLs returned). Ignoring similar problems."); + readBeyondConfiguredFieldsWarned = true; + } + return true; + } + + if (fieldIndex == 0) { + // The rest of the range check for fields after the first is below after checking + // the NULL byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + + // NOTE: The bit is set to 1 if a field is NOT NULL. + if ((nullByte & (1 << (fieldIndex % 8))) != 0) { + + // Make sure there is at least one byte that can be read for a value. + if (offset >= end) { + // Careful: since we may be dealing with NULLs in the final NULL byte, we check after + // the NULL byte check.. + warnBeyondEof(); + } + + // We have a field and are positioned to it. + + if (primitiveTypeInfos[fieldIndex].getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { + return false; + } + + // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read + // it here. + return earlyReadHiveDecimal(); + } + + // When NULL, we need to move past this field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return true; + } + + /* + * Call this method after all fields have been read to check for extra fields. + */ + public void extraFieldsCheck() { + if (offset < end) { + // We did not consume all of the byte range. + if (!bufferRangeHasExtraDataWarned) { + // Warn only once. + int length = end - start; + int remaining = end - offset; + LOG.info("Not all fields were read in the buffer range! Buffer range " + start + + " for length " + length + " but " + remaining + " bytes remain. " + + "(total buffer length " + bytes.length + ")" + + " Ignoring similar problems."); + bufferRangeHasExtraDataWarned = true; + } + } + } + + /* + * Read integrity warning flags. + */ + @Override + public boolean readBeyondConfiguredFieldsWarned() { + return readBeyondConfiguredFieldsWarned; + } + @Override + public boolean readBeyondBufferRangeWarned() { + return readBeyondBufferRangeWarned; + } + @Override + public boolean bufferRangeHasExtraDataWarned() { + return bufferRangeHasExtraDataWarned; + } + + private void warnBeyondEof() throws EOFException { + if (!readBeyondBufferRangeWarned) { + // Warn only once. + int length = end - start; + LOG.info("Reading beyond buffer range! Buffer range " + start + + " for length " + length + " but reading more... " + + "(total buffer length " + bytes.length + ")" + + " Ignoring similar problems."); + readBeyondBufferRangeWarned = true; + } + } + + /* + * BOOLEAN. + */ + @Override + public boolean readBoolean() throws IOException { + // No check needed for single byte read. + byte result = bytes[offset++]; + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return (result != 0); + } + + /* + * BYTE. + */ + @Override + public byte readByte() throws IOException { + // No check needed for single byte read. + byte result = bytes[offset++]; + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + if ((fieldIndex % 8) == 0) { + nullByte = bytes[offset++]; + } + } + + return result; + } + + /* + * SHORT. + */ + @Override + public short readShort() throws IOException { + // Last item -- ok to be at end. + if (offset + 2 > end) { + warnBeyondEof(); + } + short result = LazyBinaryUtils.byteArrayToShort(bytes, offset); + offset += 2; + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return result; + } + + /* + * INT. + */ + @Override + public int readInt() throws IOException { + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return tempVInt.value; + } + + /* + * LONG. + */ + @Override + public long readLong() throws IOException { + LazyBinaryUtils.readVLong(bytes, offset, tempVLong); + offset += tempVLong.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return tempVLong.value; + } + + /* + * FLOAT. + */ + @Override + public float readFloat() throws IOException { + // Last item -- ok to be at end. + if (offset + 4 > end) { + warnBeyondEof(); + } + float result = Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, offset)); + offset += 4; + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return result; + } + + /* + * DOUBLE. + */ + @Override + public double readDouble() throws IOException { + // Last item -- ok to be at end. + if (offset + 8 > end) { + warnBeyondEof(); + } + double result = Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, offset)); + offset += 8; + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + return result; + } + + /* + * STRING. + * + * Can be used to read CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + + // This class is for internal use. + private class LazyBinaryReadStringResults extends ReadStringResults { + public LazyBinaryReadStringResults() { + super(); + } + } + + // Reading a STRING field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadStringResults createReadStringResults() { + return new LazyBinaryReadStringResults(); + } + + @Override + public void readString(ReadStringResults readStringResults) throws IOException { + // using vint instead of 4 bytes + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Could be last item for empty string -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + int saveStart = offset; + int length = tempVInt.value; + offset += length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + readStringResults.bytes = bytes; + readStringResults.start = saveStart; + readStringResults.length = length; + } + + /* + * CHAR. + */ + + // This class is for internal use. + private static class LazyBinaryReadHiveCharResults extends ReadHiveCharResults { + + // Use our STRING reader. + public LazyBinaryReadStringResults readStringResults; + + public LazyBinaryReadHiveCharResults() { + super(); + } + + public HiveCharWritable getHiveCharWritable() { + return hiveCharWritable; + } + } + + // Reading a CHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different CHAR field. + @Override + public ReadHiveCharResults createReadHiveCharResults() { + return new LazyBinaryReadHiveCharResults(); + } + + public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException { + LazyBinaryReadHiveCharResults lazyBinaryReadHiveCharResults = (LazyBinaryReadHiveCharResults) readHiveCharResults; + + if (!lazyBinaryReadHiveCharResults.isInit()) { + lazyBinaryReadHiveCharResults.init((CharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + if (lazyBinaryReadHiveCharResults.readStringResults == null) { + lazyBinaryReadHiveCharResults.readStringResults = new LazyBinaryReadStringResults(); + } + LazyBinaryReadStringResults readStringResults = lazyBinaryReadHiveCharResults.readStringResults; + + // Read the bytes using our basic method. + readString(readStringResults); + + // Copy the bytes into our Text object, then truncate. + HiveCharWritable hiveCharWritable = lazyBinaryReadHiveCharResults.getHiveCharWritable(); + hiveCharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); + hiveCharWritable.enforceMaxLength(lazyBinaryReadHiveCharResults.getMaxLength()); + + readHiveCharResults.bytes = hiveCharWritable.getTextValue().getBytes(); + readHiveCharResults.start = 0; + readHiveCharResults.length = hiveCharWritable.getTextValue().getLength(); + } + + /* + * VARCHAR. + */ + + // This class is for internal use. + private static class LazyBinaryReadHiveVarcharResults extends ReadHiveVarcharResults { + + // Use our STRING reader. + public LazyBinaryReadStringResults readStringResults; + + public LazyBinaryReadHiveVarcharResults() { + super(); + } + + public HiveVarcharWritable getHiveVarcharWritable() { + return hiveVarcharWritable; + } + } + + // Reading a VARCHAR field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different VARCHAR field. + @Override + public ReadHiveVarcharResults createReadHiveVarcharResults() { + return new LazyBinaryReadHiveVarcharResults(); + } + + public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException { + LazyBinaryReadHiveVarcharResults lazyBinaryReadHiveVarcharResults = (LazyBinaryReadHiveVarcharResults) readHiveVarcharResults; + + if (!lazyBinaryReadHiveVarcharResults.isInit()) { + lazyBinaryReadHiveVarcharResults.init((VarcharTypeInfo) primitiveTypeInfos[fieldIndex]); + } + + if (lazyBinaryReadHiveVarcharResults.readStringResults == null) { + lazyBinaryReadHiveVarcharResults.readStringResults = new LazyBinaryReadStringResults(); + } + LazyBinaryReadStringResults readStringResults = lazyBinaryReadHiveVarcharResults.readStringResults; + + // Read the bytes using our basic method. + readString(readStringResults); + + // Copy the bytes into our Text object, then truncate. + HiveVarcharWritable hiveVarcharWritable = lazyBinaryReadHiveVarcharResults.getHiveVarcharWritable(); + hiveVarcharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); + hiveVarcharWritable.enforceMaxLength(lazyBinaryReadHiveVarcharResults.getMaxLength()); + + readHiveVarcharResults.bytes = hiveVarcharWritable.getTextValue().getBytes(); + readHiveVarcharResults.start = 0; + readHiveVarcharResults.length = hiveVarcharWritable.getTextValue().getLength(); + } + + /* + * BINARY. + */ + + // This class is for internal use. + private class LazyBinaryReadBinaryResults extends ReadBinaryResults { + + // Use our STRING reader. + public LazyBinaryReadStringResults readStringResults; + + public LazyBinaryReadBinaryResults() { + super(); + } + } + + // Reading a BINARY field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different bytes field. + @Override + public ReadBinaryResults createReadBinaryResults() { + return new LazyBinaryReadBinaryResults(); + } + + public void readBinary(ReadBinaryResults readBinaryResults) throws IOException { + LazyBinaryReadBinaryResults lazyBinaryReadBinaryResults = (LazyBinaryReadBinaryResults) readBinaryResults; + + if (lazyBinaryReadBinaryResults.readStringResults == null) { + lazyBinaryReadBinaryResults.readStringResults = new LazyBinaryReadStringResults(); + } + LazyBinaryReadStringResults readStringResults = lazyBinaryReadBinaryResults.readStringResults; + + // Read the bytes using our basic method. + readString(readStringResults); + + readBinaryResults.bytes = readStringResults.bytes; + readBinaryResults.start = readStringResults.start; + readBinaryResults.length = readStringResults.length; + } + + /* + * DATE. + */ + + // This class is for internal use. + private static class LazyBinaryReadDateResults extends ReadDateResults { + + public LazyBinaryReadDateResults() { + super(); + } + + public DateWritable getDateWritable() { + return dateWritable; + } + } + + // Reading a DATE field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DATE field. + @Override + public ReadDateResults createReadDateResults() { + return new LazyBinaryReadDateResults(); + } + + @Override + public void readDate(ReadDateResults readDateResults) throws IOException { + LazyBinaryReadDateResults lazyBinaryReadDateResults = (LazyBinaryReadDateResults) readDateResults; + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + DateWritable dateWritable = lazyBinaryReadDateResults.getDateWritable(); + dateWritable.set(tempVInt.value); + } + + /* + * INTERVAL_YEAR_MONTH. + */ + + // This class is for internal use. + private static class LazyBinaryReadIntervalYearMonthResults extends ReadIntervalYearMonthResults { + + public LazyBinaryReadIntervalYearMonthResults() { + super(); + } + + public HiveIntervalYearMonthWritable getHiveIntervalYearMonthWritable() { + return hiveIntervalYearMonthWritable; + } + } + + // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_YEAR_MONTH field. + @Override + public ReadIntervalYearMonthResults createReadIntervalYearMonthResults() { + return new LazyBinaryReadIntervalYearMonthResults(); + } + + @Override + public void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResults) + throws IOException { + LazyBinaryReadIntervalYearMonthResults lazyBinaryReadIntervalYearMonthResults = + (LazyBinaryReadIntervalYearMonthResults) readIntervalYearMonthResults; + + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable = + lazyBinaryReadIntervalYearMonthResults.getHiveIntervalYearMonthWritable(); + hiveIntervalYearMonthWritable.set(tempVInt.value); + } + + /* + * INTERVAL_DAY_TIME. + */ + + // This class is for internal use. + private static class LazyBinaryReadIntervalDayTimeResults extends ReadIntervalDayTimeResults { + + public LazyBinaryReadIntervalDayTimeResults() { + super(); + } + + public HiveIntervalDayTimeWritable getHiveIntervalDayTimeWritable() { + return hiveIntervalDayTimeWritable; + } + } + + // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. + // A separate results object is created by the caller at initialization per different + // INTERVAL_DAY_TIME field. + @Override + public ReadIntervalDayTimeResults createReadIntervalDayTimeResults() { + return new LazyBinaryReadIntervalDayTimeResults(); + } + + @Override + public void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResults) + throws IOException { + LazyBinaryReadIntervalDayTimeResults lazyBinaryReadIntervalDayTimeResults = + (LazyBinaryReadIntervalDayTimeResults) readIntervalDayTimeResults; + LazyBinaryUtils.readVLong(bytes, offset, tempVLong); + offset += tempVLong.length; + if (offset >= end) { + // Overshoot or not enough for next item. + warnBeyondEof(); + } + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable = + lazyBinaryReadIntervalDayTimeResults.getHiveIntervalDayTimeWritable(); + hiveIntervalDayTimeWritable.set(tempVLong.value, tempVInt.value); + } + + /* + * TIMESTAMP. + */ + + // This class is for internal use. + private static class LazyBinaryReadTimestampResults extends ReadTimestampResults { + + public LazyBinaryReadTimestampResults() { + super(); + } + + public TimestampWritable getTimestampWritable() { + return timestampWritable; + } + } + + // Reading a TIMESTAMP field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different TIMESTAMP field. + @Override + public ReadTimestampResults createReadTimestampResults() { + return new LazyBinaryReadTimestampResults(); + } + + @Override + public void readTimestamp(ReadTimestampResults readTimestampResults) throws IOException { + LazyBinaryReadTimestampResults lazyBinaryReadTimestampResults = (LazyBinaryReadTimestampResults) readTimestampResults; + int length = TimestampWritable.getTotalLength(bytes, offset); + int saveStart = offset; + offset += length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + // Move past this NOT NULL field. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + TimestampWritable timestampWritable = lazyBinaryReadTimestampResults.getTimestampWritable(); + timestampWritable.set(bytes, saveStart); + } + + /* + * DECIMAL. + */ + + // This class is for internal use. + private static class LazyBinaryReadDecimalResults extends ReadDecimalResults { + + public HiveDecimal hiveDecimal; + + public void init(DecimalTypeInfo decimalTypeInfo) { + super.init(decimalTypeInfo); + } + + @Override + public HiveDecimal getHiveDecimal() { + return hiveDecimal; + } + } + + // Reading a DECIMAL field require a results object to receive value information. A separate + // results object is created by the caller at initialization per different DECIMAL field. + @Override + public ReadDecimalResults createReadDecimalResults() { + return new LazyBinaryReadDecimalResults(); + } + + @Override + public void readHiveDecimal(ReadDecimalResults readDecimalResults) throws IOException { + LazyBinaryReadDecimalResults lazyBinaryReadDecimalResults = (LazyBinaryReadDecimalResults) readDecimalResults; + + if (!lazyBinaryReadDecimalResults.isInit()) { + lazyBinaryReadDecimalResults.init(saveDecimalTypeInfo); + } + + lazyBinaryReadDecimalResults.hiveDecimal = saveDecimal; + + saveDecimal = null; + saveDecimalTypeInfo = null; + } + + /** + * We read the whole HiveDecimal value and then enforce precision and scale, which may + * make it a NULL. + * @return Returns true if this HiveDecimal enforced to a NULL. + */ + private boolean earlyReadHiveDecimal() throws EOFException { + + // Since enforcing precision and scale can cause a HiveDecimal to become NULL, + // we must read it, enforce it here, and either return NULL or buffer the result. + + // These calls are to see how much data there is. The setFromBytes call below will do the same + // readVInt reads but actually unpack the decimal. + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + int saveStart = offset; + offset += tempVInt.length; + if (offset >= end) { + // Overshoot or not enough for next item. + warnBeyondEof(); + } + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + if (offset >= end) { + // Overshoot or not enough for next item. + warnBeyondEof(); + } + offset += tempVInt.value; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + int length = offset - saveStart; + + if (tempHiveDecimalWritable == null) { + tempHiveDecimalWritable = new HiveDecimalWritable(); + } + tempHiveDecimalWritable.setFromBytes(bytes, saveStart, length); + + saveDecimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfos[fieldIndex]; + + int precision = saveDecimalTypeInfo.getPrecision(); + int scale = saveDecimalTypeInfo.getScale(); + + saveDecimal = tempHiveDecimalWritable.getHiveDecimal(precision, scale); + + // Move past this field whether it is NULL or NOT NULL. + fieldIndex++; + + // Every 8 fields we read a new NULL byte. + if (fieldIndex < fieldCount) { + if ((fieldIndex % 8) == 0) { + // Get next null byte. + if (offset >= end) { + warnBeyondEof(); + } + nullByte = bytes[offset++]; + } + } + + // Now return whether it is NULL or NOT NULL. + return (saveDecimal == null); + } +} \ No newline at end of file Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java (working copy) @@ -0,0 +1,734 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazybinary.fast; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hive.common.util.DateUtils; + +/* + * Directly serialize, field-by-field, the LazyBinary format. +* + * This is an alternative way to serialize than what is provided by LazyBinarySerDe. + */ +public class LazyBinarySerializeWrite implements SerializeWrite { + public static final Log LOG = LogFactory.getLog(LazyBinarySerializeWrite.class.getName()); + + private Output output; + + private int fieldCount; + private int fieldIndex; + private byte nullByte; + private long nullOffset; + + // For thread safety, we allocate private writable objects for our use only. + private HiveDecimalWritable hiveDecimalWritable; + private TimestampWritable timestampWritable; + private HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable; + private HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable; + private HiveIntervalDayTime hiveIntervalDayTime; + + public LazyBinarySerializeWrite(int fieldCount) { + this(); + this.fieldCount = fieldCount; + } + + // Not public since we must have the field count and other information. + private LazyBinarySerializeWrite() { + } + + /* + * Set the buffer that will receive the serialized data. + */ + @Override + public void set(Output output) { + this.output = output; + output.reset(); + fieldIndex = 0; + nullByte = 0; + nullOffset = 0; + } + + /* + * Reset the previously supplied buffer that will receive the serialized data. + */ + @Override + public void reset() { + output.reset(); + fieldIndex = 0; + nullByte = 0; + nullOffset = 0; + } + + /* + * General Pattern: + * + * // Every 8 fields we write a NULL byte. + * IF ((fieldIndex % 8) == 0), then + * IF (fieldIndex > 0), then + * Write back previous NullByte + * NullByte = 0 + * Remember write position + * Allocate room for next NULL byte. + * + * WHEN NOT NULL: Set bit in NULL byte; Write value. + * OTHERWISE NULL: We do not set a bit in the nullByte when we are writing a null. + * + * Increment fieldIndex + * + * IF (fieldIndex == fieldCount), then + * Write back final NullByte + * + */ + + /* + * Write a NULL field. + */ + @Override + public void writeNull() throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // We DO NOT set a bit in the NULL byte when we are writing a NULL. + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * BOOLEAN. + */ + @Override + public void writeBoolean(boolean v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + output.write((byte) (v ? 1 : 0)); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * BYTE. + */ + @Override + public void writeByte(byte v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + output.write(v); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * SHORT. + */ + @Override + public void writeShort(short v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + output.write((byte) (v >> 8)); + output.write((byte) (v)); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * INT. + */ + @Override + public void writeInt(int v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeVInt(output, v); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * LONG. + */ + @Override + public void writeLong(long v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeVLong(output, v); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * FLOAT. + */ + @Override + public void writeFloat(float vf) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + int v = Float.floatToIntBits(vf); + output.write((byte) (v >> 24)); + output.write((byte) (v >> 16)); + output.write((byte) (v >> 8)); + output.write((byte) (v)); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * DOUBLE. + */ + @Override + public void writeDouble(double v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeDouble(output, v); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * STRING. + * + * Can be used to write CHAR and VARCHAR when the caller takes responsibility for + * truncation/padding issues. + */ + @Override + public void writeString(byte[] v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + int length = v.length; + LazyBinaryUtils.writeVInt(output, length); + + output.write(v, 0, length); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + @Override + public void writeString(byte[] v, int start, int length) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeVInt(output, length); + + output.write(v, start, length); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * CHAR. + */ + @Override + public void writeHiveChar(HiveChar hiveChar) throws IOException { + String string = hiveChar.getStrippedValue(); + byte[] bytes = string.getBytes(); + writeString(bytes); + } + + /* + * VARCHAR. + */ + @Override + public void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException { + String string = hiveVarchar.getValue(); + byte[] bytes = string.getBytes(); + writeString(bytes); + } + + /* + * BINARY. + */ + @Override + public void writeBinary(byte[] v) throws IOException { + writeString(v); + } + + @Override + public void writeBinary(byte[] v, int start, int length) throws IOException { + writeString(v, start, length); + } + + /* + * DATE. + */ + @Override + public void writeDate(Date date) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeVInt(output, DateWritable.dateToDays(date)); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + // We provide a faster way to write a date without a Date object. + @Override + public void writeDate(int dateAsDays) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + LazyBinaryUtils.writeVInt(output, dateAsDays); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * TIMESTAMP. + */ + @Override + public void writeTimestamp(Timestamp v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (timestampWritable == null) { + timestampWritable = new TimestampWritable(); + } + timestampWritable.set(v); + timestampWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * INTERVAL_YEAR_MONTH. + */ + @Override + public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (hiveIntervalYearMonthWritable == null) { + hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); + } + hiveIntervalYearMonthWritable.set(viyt); + hiveIntervalYearMonthWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + @Override + public void writeHiveIntervalYearMonth(int totalMonths) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (hiveIntervalYearMonthWritable == null) { + hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); + } + hiveIntervalYearMonthWritable.set(totalMonths); + hiveIntervalYearMonthWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * INTERVAL_DAY_TIME. + */ + @Override + public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (hiveIntervalDayTimeWritable == null) { + hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); + } + hiveIntervalDayTimeWritable.set(vidt); + hiveIntervalDayTimeWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + @Override + public void writeHiveIntervalDayTime(long totalNanos) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (hiveIntervalDayTime == null) { + hiveIntervalDayTime = new HiveIntervalDayTime(); + } + if (hiveIntervalDayTimeWritable == null) { + hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); + } + DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, totalNanos); + hiveIntervalDayTimeWritable.set(hiveIntervalDayTime); + hiveIntervalDayTimeWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } + + /* + * DECIMAL. + */ + @Override + public void writeHiveDecimal(HiveDecimal v) throws IOException { + + // Every 8 fields we write a NULL byte. + if ((fieldIndex % 8) == 0) { + if (fieldIndex > 0) { + // Write back previous 8 field's NULL byte. + output.writeByte(nullOffset, nullByte); + nullByte = 0; + nullOffset = output.getLength(); + } + // Allocate next NULL byte. + output.reserve(1); + } + + // Set bit in NULL byte when a field is NOT NULL. + nullByte |= 1 << (fieldIndex % 8); + + if (hiveDecimalWritable == null) { + hiveDecimalWritable = new HiveDecimalWritable(); + } + hiveDecimalWritable.set(v); + hiveDecimalWritable.writeToByteStream(output); + + fieldIndex++; + + if (fieldIndex == fieldCount) { + // Write back the final NULL byte before the last fields. + output.writeByte(nullOffset, nullByte); + } + } +} \ No newline at end of file Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveDecimalObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveDecimalObjectInspector.java (revision 1673556) +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveDecimalObjectInspector.java (working copy) @@ -29,7 +29,7 @@ public WritableHiveDecimalObjectInspector() { } - protected WritableHiveDecimalObjectInspector(DecimalTypeInfo typeInfo) { + public WritableHiveDecimalObjectInspector(DecimalTypeInfo typeInfo) { super(typeInfo); } Index: serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java (revision 1673556) +++ serde/src/test/org/apache/hadoop/hive/serde2/TestStatsSerde.java (working copy) @@ -30,7 +30,9 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; @@ -104,24 +106,11 @@ int num = 1000; Random r = new Random(1234); MyTestClass rows[] = new MyTestClass[num]; + for (int i = 0; i < num; i++) { - int randField = r.nextInt(12); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - List li = randField > 10 ? null : TestBinarySortableSerDe - .getRandIntegerArray(r); - byte[] ba = TestBinarySortableSerDe.getRandBA(r, i); - MyTestClass t = new MyTestClass(b, s, n, l, f, d, st, bd, date, is, li,ba); + MyTestClass t = new MyTestClass(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); rows[i] = t; } Index: serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java (revision 0) +++ serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java (working copy) @@ -0,0 +1,373 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2; + +import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.Arrays; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.Text; + +/** + * TestBinarySortableSerDe. + * + */ +public class VerifyFast { + + public static void verifyDeserializeRead(DeserializeRead deserializeRead, PrimitiveTypeInfo primitiveTypeInfo, Object object) throws IOException { + + boolean isNull; + + isNull = deserializeRead.readCheckNull(); + if (isNull) { + if (object != null) { + TestCase.fail("Field reports null but object is not null"); + } + return; + } else if (object == null) { + TestCase.fail("Field report not null but object is null"); + } + switch (primitiveTypeInfo.getPrimitiveCategory()) { + case BOOLEAN: + { + boolean value = deserializeRead.readBoolean(); + if (!(object instanceof Boolean)) { + TestCase.fail("Boolean expected object not Boolean"); + } + Boolean expected = (Boolean) object; + if (value != expected) { + TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case BYTE: + { + byte value = deserializeRead.readByte(); + if (!(object instanceof Byte)) { + TestCase.fail("Byte expected object not Byte"); + } + Byte expected = (Byte) object; + if (value != expected) { + TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")"); + } + } + break; + case SHORT: + { + short value = deserializeRead.readShort(); + if (!(object instanceof Short)) { + TestCase.fail("Short expected object not Short"); + } + Short expected = (Short) object; + if (value != expected) { + TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case INT: + { + int value = deserializeRead.readInt(); + if (!(object instanceof Integer)) { + TestCase.fail("Integer expected object not Integer"); + } + Integer expected = (Integer) object; + if (value != expected) { + TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case LONG: + { + long value = deserializeRead.readLong(); + if (!(object instanceof Long)) { + TestCase.fail("Long expected object not Long"); + } + Long expected = (Long) object; + if (value != expected) { + TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case FLOAT: + { + float value = deserializeRead.readFloat(); + Float expected = (Float) object; + if (!(object instanceof Float)) { + TestCase.fail("Float expected object not Float"); + } + if (value != expected) { + TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case DOUBLE: + { + double value = deserializeRead.readDouble(); + Double expected = (Double) object; + if (!(object instanceof Double)) { + TestCase.fail("Double expected object not Double"); + } + if (value != expected) { + TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")"); + } + } + break; + case STRING: + { + DeserializeRead.ReadStringResults readStringResults = deserializeRead.createReadStringResults(); + deserializeRead.readString(readStringResults); + byte[] stringBytes = Arrays.copyOfRange(readStringResults.bytes, readStringResults.start, readStringResults.start + readStringResults.length); + Text text = new Text(stringBytes); + String string = text.toString(); + String expected = (String) object; + if (!string.equals(expected)) { + TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')"); + } + } + break; + case CHAR: + { + DeserializeRead.ReadHiveCharResults readHiveCharResults = deserializeRead.createReadHiveCharResults(); + deserializeRead.readHiveChar(readHiveCharResults); + HiveChar hiveChar = readHiveCharResults.getHiveChar(); + HiveChar expected = (HiveChar) object; + if (!hiveChar.equals(expected)) { + TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')"); + } + } + break; + case VARCHAR: + { + DeserializeRead.ReadHiveVarcharResults readHiveVarcharResults = deserializeRead.createReadHiveVarcharResults(); + deserializeRead.readHiveVarchar(readHiveVarcharResults); + HiveVarchar hiveVarchar = readHiveVarcharResults.getHiveVarchar(); + HiveVarchar expected = (HiveVarchar) object; + if (!hiveVarchar.equals(expected)) { + TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')"); + } + } + break; + case DECIMAL: + { + DeserializeRead.ReadDecimalResults readDecimalResults = deserializeRead.createReadDecimalResults(); + deserializeRead.readHiveDecimal(readDecimalResults); + HiveDecimal value = readDecimalResults.getHiveDecimal(); + if (value == null) { + TestCase.fail("Decimal field evaluated to NULL"); + } + HiveDecimal expected = (HiveDecimal) object; + if (!value.equals(expected)) { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; + int precision = decimalTypeInfo.getPrecision(); + int scale = decimalTypeInfo.getScale(); + TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale); + } + } + break; + case DATE: + { + DeserializeRead.ReadDateResults readDateResults = deserializeRead.createReadDateResults(); + deserializeRead.readDate(readDateResults); + Date value = readDateResults.getDate(); + Date expected = (Date) object; + if (!value.equals(expected)) { + TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); + } + } + break; + case TIMESTAMP: + { + DeserializeRead.ReadTimestampResults readTimestampResults = deserializeRead.createReadTimestampResults(); + deserializeRead.readTimestamp(readTimestampResults); + Timestamp value = readTimestampResults.getTimestamp(); + Timestamp expected = (Timestamp) object; + if (!value.equals(expected)) { + TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); + } + } + break; + case INTERVAL_YEAR_MONTH: + { + DeserializeRead.ReadIntervalYearMonthResults readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); + deserializeRead.readIntervalYearMonth(readIntervalYearMonthResults); + HiveIntervalYearMonth value = readIntervalYearMonthResults.getHiveIntervalYearMonth(); + HiveIntervalYearMonth expected = (HiveIntervalYearMonth) object; + if (!value.equals(expected)) { + TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); + } + } + break; + case INTERVAL_DAY_TIME: + { + DeserializeRead.ReadIntervalDayTimeResults readIntervalDayTimeResults = deserializeRead.createReadIntervalDayTimeResults(); + deserializeRead.readIntervalDayTime(readIntervalDayTimeResults); + HiveIntervalDayTime value = readIntervalDayTimeResults.getHiveIntervalDayTime(); + HiveIntervalDayTime expected = (HiveIntervalDayTime) object; + if (!value.equals(expected)) { + TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); + } + } + break; + case BINARY: + { + DeserializeRead.ReadBinaryResults readBinaryResults = deserializeRead.createReadBinaryResults(); + deserializeRead.readBinary(readBinaryResults); + byte[] byteArray = Arrays.copyOfRange(readBinaryResults.bytes, readBinaryResults.start, readBinaryResults.start + readBinaryResults.length); + byte[] expected = (byte[]) object; + if (byteArray.length != expected.length){ + TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + byteArray + ")"); + } + for (int b = 0; b < byteArray.length; b++) { + if (byteArray[b] != expected[b]) { + TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + byteArray + ")"); + } + } + } + break; + default: + throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory()); + } + } + + public static void serializeWrite(SerializeWrite serializeWrite, PrimitiveCategory primitiveCategory, Object object) throws IOException { + if (object == null) { + serializeWrite.writeNull(); + return; + } + switch (primitiveCategory) { + case BOOLEAN: + { + boolean value = (Boolean) object; + serializeWrite.writeBoolean(value); + } + break; + case BYTE: + { + byte value = (Byte) object; + serializeWrite.writeByte(value); + } + break; + case SHORT: + { + short value = (Short) object; + serializeWrite.writeShort(value); + } + break; + case INT: + { + int value = (Integer) object; + serializeWrite.writeInt(value); + } + break; + case LONG: + { + long value = (Long) object; + serializeWrite.writeLong(value); + } + break; + case FLOAT: + { + float value = (Float) object; + serializeWrite.writeFloat(value); + } + break; + case DOUBLE: + { + double value = (Double) object; + serializeWrite.writeDouble(value); + } + break; + case STRING: + { + String value = (String) object; + byte[] stringBytes = value.getBytes(); + int stringLength = stringBytes.length; + serializeWrite.writeString(stringBytes, 0, stringLength); + } + break; + case CHAR: + { + HiveChar value = (HiveChar) object; + serializeWrite.writeHiveChar(value); + } + break; + case VARCHAR: + { + HiveVarchar value = (HiveVarchar) object; + serializeWrite.writeHiveVarchar(value); + } + break; + case DECIMAL: + { + HiveDecimal value = (HiveDecimal) object; + serializeWrite.writeHiveDecimal(value); + } + break; + case DATE: + { + Date value = (Date) object; + serializeWrite.writeDate(value); + } + break; + case TIMESTAMP: + { + Timestamp value = (Timestamp) object; + serializeWrite.writeTimestamp(value); + } + break; + case INTERVAL_YEAR_MONTH: + { + HiveIntervalYearMonth value = (HiveIntervalYearMonth) object; + serializeWrite.writeHiveIntervalYearMonth(value); + } + break; + case INTERVAL_DAY_TIME: + { + HiveIntervalDayTime value = (HiveIntervalDayTime) object; + serializeWrite.writeHiveIntervalDayTime(value); + } + break; + case BINARY: + { + byte[] binaryBytes = (byte[]) object; + int length = binaryBytes.length; + serializeWrite.writeBinary(binaryBytes, 0, length); + } + break; + default: + throw new Error("Unknown primitive category " + primitiveCategory.name()); + } + } +} \ No newline at end of file Index: serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java (revision 1673556) +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java (working copy) @@ -185,9 +185,7 @@ } - @Test - public void canDeserializeRecords() throws SerDeException, IOException { - Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.RECORD_SCHEMA); + public void canDeserializeRecordsInternal(Schema s, Schema fileSchema) throws SerDeException, IOException { GenericData.Record record = new GenericData.Record(s); GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema()); innerRecord.put("int1", 42); @@ -196,7 +194,7 @@ record.put("aRecord", innerRecord); assertTrue(GENERIC_DATA.validate(s, record)); - AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); + AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record, fileSchema); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); @@ -232,6 +230,19 @@ assertEquals(42432234234l, innerRecord2OI.getStructFieldData(innerRecord2, allStructFieldRefs1.get(2))); } + @Test + public void canDeserializeRecords() throws SerDeException, IOException { + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.RECORD_SCHEMA); + canDeserializeRecordsInternal(s, s); + } + + @Test + public void canDeserializeNullableRecords() throws SerDeException, IOException { + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.RECORD_SCHEMA); + Schema fileSchema = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.NULLABLE_RECORD_SCHEMA); + canDeserializeRecordsInternal(s, fileSchema); + } + private class ResultPair { // Because Pairs give Java the vapors. public final ObjectInspector oi; public final Object value; Index: serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java (revision 1673556) +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java (working copy) @@ -100,6 +100,7 @@ " }\n" + " ]\n" + "}"; + public static final String NULLABLE_RECORD_SCHEMA = "[\"null\", " + RECORD_SCHEMA + "]"; public static final String UNION_SCHEMA = "{\n" + " \"namespace\": \"test.a.rossa\",\n" + " \"name\": \"oneUnion\",\n" + Index: serde/src/test/org/apache/hadoop/hive/serde2/avro/Utils.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/avro/Utils.java (revision 1673556) +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/Utils.java (working copy) @@ -24,6 +24,7 @@ import java.io.IOException; import java.rmi.server.UID; +import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; class Utils { @@ -31,10 +32,15 @@ // chance to muck with the bytes and we're working against real Avro data. public static AvroGenericRecordWritable serializeAndDeserializeRecord(GenericData.Record record) throws IOException { + return serializeAndDeserializeRecord(record, record.getSchema()); + } + + public static AvroGenericRecordWritable + serializeAndDeserializeRecord(GenericData.Record record, Schema fileSchema) throws IOException { AvroGenericRecordWritable garw = new AvroGenericRecordWritable(record); garw.setRecordReaderID(new UID()); // Assuming file schema is the same as record schema for testing purpose. - garw.setFileSchema(record.getSchema()); + garw.setFileSchema(fileSchema); ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream daos = new DataOutputStream(baos); garw.write(daos); Index: serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java (revision 1673556) +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java (working copy) @@ -18,40 +18,82 @@ package org.apache.hadoop.hive.serde2.binarysortable; import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; import java.util.List; +import java.util.Random; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; public class MyTestClass { - Byte myByte; - Short myShort; - Integer myInt; - Long myLong; - Float myFloat; - Double myDouble; - String myString; - HiveDecimal myDecimal; - Date myDate; - MyTestInnerStruct myStruct; - List myList; - byte[] myBA; + public Boolean myBool; + public Byte myByte; + public Short myShort; + public Integer myInt; + public Long myLong; + public Float myFloat; + public Double myDouble; + public String myString; + public HiveChar myHiveChar; + public HiveVarchar myHiveVarchar; + public byte[] myBinary; + public HiveDecimal myDecimal; + public Date myDate; + public Timestamp myTimestamp; + public HiveIntervalYearMonth myIntervalYearMonth; + public HiveIntervalDayTime myIntervalDayTime; + + // Add more complex types. + public MyTestInnerStruct myStruct; + public List myList; + public MyTestClass() { } - public MyTestClass(Byte b, Short s, Integer i, Long l, Float f, Double d, - String st, HiveDecimal bd, Date date, MyTestInnerStruct is, List li, byte[] ba) { - myByte = b; - myShort = s; - myInt = i; - myLong = l; - myFloat = f; - myDouble = d; - myString = st; - myDecimal = bd; - myDate = date; - myStruct = is; - myList = li; - myBA = ba; + public final static int fieldCount = 18; + + public int randomFill(Random r, ExtraTypeInfo extraTypeInfo) { + int randField = r.nextInt(MyTestClass.fieldCount); + int field = 0; + + myBool = (randField == field++) ? null : (r.nextInt(1) == 1); + myByte = (randField == field++) ? null : Byte.valueOf((byte) r.nextInt()); + myShort = (randField == field++) ? null : Short.valueOf((short) r.nextInt()); + myInt = (randField == field++) ? null : Integer.valueOf(r.nextInt()); + myLong = (randField == field++) ? null : Long.valueOf(r.nextLong()); + myFloat = (randField == field++) ? null : Float + .valueOf(r.nextFloat() * 10 - 5); + myDouble = (randField == field++) ? null : Double + .valueOf(r.nextDouble() * 10 - 5); + myString = (randField == field++) ? null : MyTestPrimitiveClass.getRandString(r); + myHiveChar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveChar(r, extraTypeInfo); + myHiveVarchar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveVarchar(r, extraTypeInfo); + myBinary = MyTestPrimitiveClass.getRandBinary(r, r.nextInt(1000)); + myDecimal = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveDecimal(r, extraTypeInfo); + myDate = (randField == field++) ? null : MyTestPrimitiveClass.getRandDate(r); + myTimestamp = (randField == field++) ? null : MyTestPrimitiveClass.getRandTimestamp(r); + myIntervalYearMonth = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalYearMonth(r); + myIntervalDayTime = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalDayTime(r); + + myStruct = (randField == field++) ? null : new MyTestInnerStruct( + r.nextInt(5) - 2, r.nextInt(5) - 2); + myList = (randField == field++) ? null : getRandIntegerArray(r); + return field; } + + public static List getRandIntegerArray(Random r) { + int length = r.nextInt(10); + ArrayList result = new ArrayList(length); + for (int i = 0; i < length; i++) { + result.add(r.nextInt(128)); + } + return result; + } + } Index: serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestPrimitiveClass.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestPrimitiveClass.java (revision 0) +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestPrimitiveClass.java (working copy) @@ -0,0 +1,453 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.binarysortable; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.HiveBaseChar; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.Writable; +import org.apache.hive.common.util.DateUtils; + +// Just the primitive types. +public class MyTestPrimitiveClass { + + public Boolean myBool; + public Byte myByte; + public Short myShort; + public Integer myInt; + public Long myLong; + public Float myFloat; + public Double myDouble; + public String myString; + public HiveChar myHiveChar; + public HiveVarchar myHiveVarchar; + public byte[] myBinary; + public HiveDecimal myDecimal; + public Date myDate; + public Timestamp myTimestamp; + public HiveIntervalYearMonth myIntervalYearMonth; + public HiveIntervalDayTime myIntervalDayTime; + + public MyTestPrimitiveClass() { + } + + public final static int primitiveCount = 16; + + public int randomFill(Random r, ExtraTypeInfo extraTypeInfo) { + int randField = r.nextInt(primitiveCount); + int field = 0; + return randomFill(r, randField, field, extraTypeInfo); + } + + public boolean chooseNull(Random r, int randField, int field) { + if (randField == field) { + return true; + } + return (r.nextInt(5) == 0); + } + + public int randomFill(Random r, int randField, int field, ExtraTypeInfo extraTypeInfo) { + myBool = chooseNull(r, randField, field++) ? null : Boolean.valueOf(r.nextInt(1) == 1); + myByte = chooseNull(r, randField, field++) ? null : Byte.valueOf((byte) r.nextInt()); + myShort = chooseNull(r, randField, field++) ? null : Short.valueOf((short) r.nextInt()); + myInt = chooseNull(r, randField, field++) ? null : Integer.valueOf(r.nextInt()); + myLong = chooseNull(r, randField, field++) ? null : Long.valueOf(r.nextLong()); + myFloat = chooseNull(r, randField, field++) ? null : Float + .valueOf(r.nextFloat() * 10 - 5); + myDouble = chooseNull(r, randField, field++) ? null : Double + .valueOf(r.nextDouble() * 10 - 5); + myString = chooseNull(r, randField, field++) ? null : getRandString(r); + myHiveChar = chooseNull(r, randField, field++) ? null : getRandHiveChar(r, extraTypeInfo); + myHiveVarchar = chooseNull(r, randField, field++) ? null : getRandHiveVarchar(r, extraTypeInfo); + myBinary = getRandBinary(r, r.nextInt(1000)); + myDecimal = chooseNull(r, randField, field++) ? null : getRandHiveDecimal(r, extraTypeInfo); + myDate = chooseNull(r, randField, field++) ? null : getRandDate(r); + myTimestamp = chooseNull(r, randField, field++) ? null : getRandTimestamp(r); + myIntervalYearMonth = chooseNull(r, randField, field++) ? null : getRandIntervalYearMonth(r); + myIntervalDayTime = chooseNull(r, randField, field++) ? null : getRandIntervalDayTime(r); + return field; + } + + public static class ExtraTypeInfo { + public int hiveCharMaxLength; + public int hiveVarcharMaxLength; + public int precision; + public int scale; + + public ExtraTypeInfo() { + // For NULL fields, make up a valid max length. + hiveCharMaxLength = 1; + hiveVarcharMaxLength = 1; + precision = HiveDecimal.SYSTEM_DEFAULT_PRECISION; + scale = HiveDecimal.SYSTEM_DEFAULT_SCALE; + } + } + + public static PrimitiveTypeInfo[] getPrimitiveTypeInfos(ExtraTypeInfo extraTypeInfo) { + PrimitiveTypeInfo[] primitiveTypeInfos = new PrimitiveTypeInfo[primitiveCount]; + for (int i = 0; i < primitiveCount; i++) { + primitiveTypeInfos[i] = getPrimitiveTypeInfo(i, extraTypeInfo); + } + return primitiveTypeInfos; + } + + public static String getRandString(Random r) { + return getRandString(r, null, r.nextInt(10)); + } + + public static String getRandString(Random r, String characters, int length) { + if (characters == null) { + characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + } + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < length; i++) { + if (characters == null) { + sb.append((char) (r.nextInt(128))); + } else { + sb.append(characters.charAt(r.nextInt(characters.length()))); + } + } + return sb.toString(); + } + + public static HiveChar getRandHiveChar(Random r, ExtraTypeInfo extraTypeInfo) { + int maxLength = 10 + r.nextInt(60); + extraTypeInfo.hiveCharMaxLength = maxLength; + String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveChar hiveChar = new HiveChar(randomString, maxLength); + return hiveChar; + } + + public static HiveVarchar getRandHiveVarchar(Random r, ExtraTypeInfo extraTypeInfo) { + int maxLength = 10 + r.nextInt(60); + extraTypeInfo.hiveVarcharMaxLength = maxLength; + String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength); + return hiveVarchar; + } + + public static byte[] getRandBinary(Random r, int len){ + byte[] bytes = new byte[len]; + for (int j = 0; j < len; j++){ + bytes[j] = Byte.valueOf((byte) r.nextInt()); + } + return bytes; + } + + private static final String DECIMAL_CHARS = "0123456789"; + + public static HiveDecimal getRandHiveDecimal(Random r, ExtraTypeInfo extraTypeInfo) { + while (true) { + StringBuilder sb = new StringBuilder(); + int precision = 1 + r.nextInt(18); + int scale = 0 + r.nextInt(precision + 1); + + int integerDigits = precision - scale; + + if (r.nextBoolean()) { + sb.append("-"); + } + + if (integerDigits == 0) { + sb.append("0"); + } else { + sb.append(getRandString(r, DECIMAL_CHARS, integerDigits)); + } + if (scale != 0) { + sb.append("."); + sb.append(getRandString(r, DECIMAL_CHARS, scale)); + } + + HiveDecimal bd = HiveDecimal.create(sb.toString()); + extraTypeInfo.precision = bd.precision(); + extraTypeInfo.scale = bd.scale(); + if (extraTypeInfo.scale > extraTypeInfo.precision) { + // Sometimes weird decimals are produced? + continue; + } + + // For now, punt. + extraTypeInfo.precision = HiveDecimal.SYSTEM_DEFAULT_PRECISION; + extraTypeInfo.scale = HiveDecimal.SYSTEM_DEFAULT_SCALE; + return bd; + } + } + + public static Date getRandDate(Random r) { + String dateStr = String.format("%d-%02d-%02d", + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28))); // day + Date dateVal = Date.valueOf(dateStr); + return dateVal; + } + + public static Timestamp getRandTimestamp(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); + } + String timestampStr = String.format("%d-%02d-%02d %02d:%02d:%02d%s", + Integer.valueOf(1970 + r.nextInt(200)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + Timestamp timestampVal = Timestamp.valueOf(timestampStr); + return timestampVal; + } + + public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) { + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String intervalYearMonthStr = String.format("%s%d-%d", + yearMonthSignStr, + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(0 + r.nextInt(12))); // month + HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr); + TestCase.assertTrue(intervalYearMonthVal != null); + return intervalYearMonthVal; + } + + public static HiveIntervalDayTime getRandIntervalDayTime(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); + } + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s", + yearMonthSignStr, + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr); + TestCase.assertTrue(intervalDayTimeVal != null); + return intervalDayTimeVal; + } + + public Object getPrimitiveObject(int index) { + int field = 0; + if (index == field++) { + return myBool; + } else if (index == field++) { + return myByte; + } else if (index == field++) { + return myShort; + } else if (index == field++) { + return myInt; + } else if (index == field++) { + return myLong; + } else if (index == field++) { + return myFloat; + } else if (index == field++) { + return myDouble; + } else if (index == field++) { + return myString; + } else if (index == field++) { + return myHiveChar; + } else if (index == field++) { + return myHiveVarchar; + } else if (index == field++) { + return myBinary; + } else if (index == field++) { + return myDecimal; + } else if (index == field++) { + return myDate; + } else if (index == field++) { + return myTimestamp; + } else if (index == field++) { + return myIntervalYearMonth; + } else if (index == field++) { + return myIntervalDayTime; + } else { + throw new Error("Field " + " field not handled"); + } + } + + public Object getPrimitiveWritableObject(int index, PrimitiveTypeInfo primitiveTypeInfo) { + int field = 0; + if (index == field++) { + return (myBool == null ? null : PrimitiveObjectInspectorFactory.writableBooleanObjectInspector.create((boolean) myBool)); + } else if (index == field++) { + return (myByte == null ? null : PrimitiveObjectInspectorFactory.writableByteObjectInspector.create((byte) myByte)); + } else if (index == field++) { + return (myShort == null ? null : PrimitiveObjectInspectorFactory.writableShortObjectInspector.create((short) myShort)); + } else if (index == field++) { + return (myInt == null ? null : PrimitiveObjectInspectorFactory.writableIntObjectInspector.create((int) myInt)); + } else if (index == field++) { + return (myLong == null ? null : PrimitiveObjectInspectorFactory.writableLongObjectInspector.create((long) myLong)); + } else if (index == field++) { + return (myFloat == null ? null : PrimitiveObjectInspectorFactory.writableFloatObjectInspector.create((float) myFloat)); + } else if (index == field++) { + return (myDouble == null ? null : PrimitiveObjectInspectorFactory.writableDoubleObjectInspector.create((double) myDouble)); + } else if (index == field++) { + return (myString == null ? null : PrimitiveObjectInspectorFactory.writableStringObjectInspector.create(myString)); + } else if (index == field++) { + if (myHiveChar == null) { + return null; + } + CharTypeInfo charTypeInfo = (CharTypeInfo) primitiveTypeInfo; + WritableHiveCharObjectInspector writableCharObjectInspector = new WritableHiveCharObjectInspector(charTypeInfo); + return writableCharObjectInspector.create(myHiveChar); + } else if (index == field++) { + if (myHiveVarchar == null) { + return null; + } + VarcharTypeInfo varcharTypeInfo = (VarcharTypeInfo) primitiveTypeInfo; + WritableHiveVarcharObjectInspector writableVarcharObjectInspector = new WritableHiveVarcharObjectInspector(varcharTypeInfo); + return writableVarcharObjectInspector.create(myHiveVarchar); + } else if (index == field++) { + return (myBinary == null ? null : PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(myBinary)); + } else if (index == field++) { + if (myDecimal == null) { + return null; + } + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; + WritableHiveDecimalObjectInspector writableDecimalObjectInspector = new WritableHiveDecimalObjectInspector(decimalTypeInfo); + return writableDecimalObjectInspector.create(myDecimal); + } else if (index == field++) { + return (myDate == null ? null : PrimitiveObjectInspectorFactory.writableDateObjectInspector.create(myDate)); + } else if (index == field++) { + return (myTimestamp == null ? null : PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.create(myTimestamp)); + } else if (index == field++) { + return (myIntervalYearMonth == null ? null : PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.create(myIntervalYearMonth)); + } else if (index == field++) { + return (myIntervalDayTime == null ? null : PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.create(myIntervalDayTime)); + } else { + throw new Error("Field " + " field not handled"); + } + } + + + public static PrimitiveCategory getPrimitiveCategory(int index) { + int field = 0; + if (index == field++) { + return PrimitiveCategory.BOOLEAN; + } else if (index == field++) { + return PrimitiveCategory.BYTE; + } else if (index == field++) { + return PrimitiveCategory.SHORT; + } else if (index == field++) { + return PrimitiveCategory.INT; + } else if (index == field++) { + return PrimitiveCategory.LONG; + } else if (index == field++) { + return PrimitiveCategory.FLOAT; + } else if (index == field++) { + return PrimitiveCategory.DOUBLE; + } else if (index == field++) { + return PrimitiveCategory.STRING; + } else if (index == field++) { + return PrimitiveCategory.CHAR; + } else if (index == field++) { + return PrimitiveCategory.VARCHAR; + } else if (index == field++) { + return PrimitiveCategory.BINARY; + } else if (index == field++) { + return PrimitiveCategory.DECIMAL; + } else if (index == field++) { + return PrimitiveCategory.DATE; + } else if (index == field++) { + return PrimitiveCategory.TIMESTAMP; + } else if (index == field++) { + return PrimitiveCategory.INTERVAL_YEAR_MONTH; + } else if (index == field++) { + return PrimitiveCategory.INTERVAL_DAY_TIME; + } else { + throw new Error("Field " + " field not handled"); + } + } + + public static PrimitiveTypeInfo getPrimitiveTypeInfo(int index, ExtraTypeInfo extraTypeInfo) { + PrimitiveCategory primitiveCategory = getPrimitiveCategory(index); + String typeName; + switch (primitiveCategory) { + case BYTE: + typeName = "tinyint"; + break; + case SHORT: + typeName = "smallint"; + break; + case LONG: + typeName = "bigint"; + break; + case CHAR: + typeName = String.format("char(%d)", extraTypeInfo.hiveCharMaxLength); + break; + case VARCHAR: + typeName = String.format("varchar(%d)", extraTypeInfo.hiveVarcharMaxLength); + break; + case DECIMAL: + typeName = String.format("decimal(%d,%d)", extraTypeInfo.precision, extraTypeInfo.scale); + break; + default: + // No type name difference or adornment. + typeName = primitiveCategory.name().toLowerCase(); + break; + } + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + return primitiveTypeInfo; + } + + public StructObjectInspector getRowInspector(PrimitiveTypeInfo[] primitiveTypeInfos) { + List columnNames = new ArrayList(primitiveCount); + List primitiveObjectInspectorList = new ArrayList(primitiveCount); + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + columnNames.add(String.format("col%d", index)); + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveCategory)); + } + StandardStructObjectInspector rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList); + return rowOI; + } +} Index: serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java (revision 0) +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java (working copy) @@ -0,0 +1,234 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.binarysortable; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.VerifyFast; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.io.BytesWritable; + +public class TestBinarySortableFast extends TestCase { + + private void testBinarySortableFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, + boolean[] columnSortOrderIsDesc, SerDe serde, StructObjectInspector rowOI, boolean ascending, + Map primitiveTypeInfoMap) throws Throwable { + + BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc); + + // Try to serialize + + // One Writable per row. + BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; + + int[][] perFieldWriteLengthsArray = new int[myTestPrimitiveClasses.length][]; + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + Output output = new Output(); + binarySortableSerializeWrite.set(output); + + int[] perFieldWriteLengths = new int[MyTestPrimitiveClass.primitiveCount]; + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); + VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveCategory, object); + perFieldWriteLengths[index] = output.getLength(); + } + perFieldWriteLengthsArray[i] = perFieldWriteLengths; + + BytesWritable bytesWritable = new BytesWritable(); + bytesWritable.set(output.getData(), 0, output.getLength()); + serializeWriteBytes[i] = bytesWritable; + if (i > 0) { + int compareResult = serializeWriteBytes[i - 1].compareTo(serializeWriteBytes[i]); + if ((compareResult < 0 && !ascending) + || (compareResult > 0 && ascending)) { + System.out.println("Test failed in " + + (ascending ? "ascending" : "descending") + " order with " + + (i - 1) + " and " + i); + System.out.println("serialized data [" + (i - 1) + "] = " + + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1])); + System.out.println("serialized data [" + i + "] = " + + TestBinarySortableSerDe.hexString(serializeWriteBytes[i])); + fail("Sort order of serialized " + (i - 1) + " and " + i + + " are reversed!"); + } + } + } + + + // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); + BinarySortableDeserializeRead binarySortableDeserializeRead = + new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc); + + BytesWritable bytesWritable = serializeWriteBytes[i]; + binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], object); + } + binarySortableDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned()); + } + + // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + BytesWritable bytesWritable = serializeWriteBytes[i]; + List deserializedRow = (List) serde.deserialize(bytesWritable); + + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); + Object object = deserializedRow.get(index); + if (expected == null || object == null) { + if (expected != null || object != null) { + fail("SerDe deserialized NULL column mismatch"); + } + } else { + if (!object.equals(expected)) { + fail("SerDe deserialized value does not match"); + } + } + } + } + + // One Writable per row. + BytesWritable serdeBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; + + // Serialize using the SerDe, then below deserialize using DeserializeRead. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + + // Since SerDe reuses memory, we will need to make a copy. + BytesWritable serialized = (BytesWritable) serde.serialize(t, rowOI); + BytesWritable bytesWritable = new BytesWritable(); + bytesWritable.set(serialized); + byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + byte[] serializeWriteExpected = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); + if (!Arrays.equals(serDeOutput, serializeWriteExpected)) { + int mismatchPos = -1; + if (serDeOutput.length != serializeWriteExpected.length) { + for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) { + if (serDeOutput[b] != serializeWriteExpected[b]) { + mismatchPos = b; + break; + } + } + fail("Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i])); + } + for (int b = 0; b < serDeOutput.length; b++) { + if (serDeOutput[b] != serializeWriteExpected[b]) { + fail("SerializeWrite and SerDe serialization does not match at position " + b); + } + } + } + serdeBytes[i] = bytesWritable; + } + + // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); + BinarySortableDeserializeRead binarySortableDeserializeRead = + new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc); + + BytesWritable bytesWritable = serdeBytes[i]; + binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], object); + } + binarySortableDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned()); + } + } + + public void testBinarySortableFast() throws Throwable { + try { + + int num = 1000; + Random r = new Random(1234); + MyTestPrimitiveClass myTestPrimitiveClasses[] = new MyTestPrimitiveClass[num]; + // Need a map because we sort. + Map primitiveTypeInfoMap = new HashMap(); + + for (int i = 0; i < num; i++) { + int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); + MyTestPrimitiveClass t = new MyTestPrimitiveClass(); + int field = 0; + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, randField, field, extraTypeInfo); + myTestPrimitiveClasses[i] = t; + PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); + primitiveTypeInfoMap.put(t, primitiveTypeInfos); + } + + StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory + .getReflectionObjectInspector(MyTestPrimitiveClass.class, + ObjectInspectorOptions.JAVA); + + TestBinarySortableSerDe.sort(myTestPrimitiveClasses, rowOI); + + String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + String order; + order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '+'); + SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order); + order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '-'); + SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order); + + boolean[] columnSortOrderIsDesc = new boolean[MyTestPrimitiveClass.primitiveCount]; + Arrays.fill(columnSortOrderIsDesc, false); + testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, serde_ascending, rowOI, true, primitiveTypeInfoMap); + Arrays.fill(columnSortOrderIsDesc, true); + testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, serde_descending, rowOI, false, primitiveTypeInfoMap); + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +} \ No newline at end of file Index: serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java (revision 1673556) +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java (working copy) @@ -26,11 +26,13 @@ import junit.framework.TestCase; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; @@ -64,7 +66,7 @@ return sb.toString(); } - private SerDe getSerDe(String fieldNames, String fieldTypes, String order) + public static SerDe getSerDe(String fieldNames, String fieldTypes, String order) throws Throwable { Properties schema = new Properties(); schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames); @@ -124,7 +126,7 @@ } } - private void sort(Object[] structs, ObjectInspector oi) { + public static void sort(Object[] structs, ObjectInspector oi) { for (int i = 0; i < structs.length; i++) { for (int j = i + 1; j < structs.length; j++) { if (ObjectInspectorUtils.compare(structs[i], oi, structs[j], oi) > 0) { @@ -136,66 +138,6 @@ } } - public static HiveDecimal getRandHiveDecimal(Random r) { - StringBuilder sb = new StringBuilder(); - int l1 = 1+r.nextInt(18), l2 = r.nextInt(19); - - if (r.nextBoolean()) { - sb.append("-"); - } - - sb.append(getRandString(r, DECIMAL_CHARS, l1)); - if (l2 != 0) { - sb.append("."); - sb.append(getRandString(r, DECIMAL_CHARS, l2)); - } - - HiveDecimal bd = HiveDecimal.create(sb.toString()); - return bd; - } - - public static Date getRandDate(Random r) { - String dateStr = String.format("%d-%02d-%02d", - Integer.valueOf(1800 + r.nextInt(500)), // year - Integer.valueOf(1 + r.nextInt(12)), // month - Integer.valueOf(1 + r.nextInt(28))); // day - Date dateVal = Date.valueOf(dateStr); - return dateVal; - } - - public static String getRandString(Random r) { - return getRandString(r, null, r.nextInt(10)); - } - - public static String getRandString(Random r, String characters, int length) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < length; i++) { - if (characters == null) { - sb.append((char) (r.nextInt(128))); - } else { - sb.append(characters.charAt(r.nextInt(characters.length()))); - } - } - return sb.toString(); - } - - public static List getRandIntegerArray(Random r) { - int length = r.nextInt(10); - ArrayList result = new ArrayList(length); - for (int i = 0; i < length; i++) { - result.add(r.nextInt(128)); - } - return result; - } - - public static byte[] getRandBA(Random r, int len){ - byte[] bytes = new byte[len]; - for (int j = 0; j < len; j++){ - bytes[j] = Byte.valueOf((byte) r.nextInt()); - } - return bytes; - } - public void testBinarySortableSerDe() throws Throwable { try { @@ -206,23 +148,9 @@ MyTestClass rows[] = new MyTestClass[num]; for (int i = 0; i < num; i++) { - int randField = r.nextInt(11); MyTestClass t = new MyTestClass(); - t.myByte = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - t.myShort = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - t.myInt = randField > 2 ? null : Integer.valueOf(r.nextInt()); - t.myLong = randField > 3 ? null : Long.valueOf(r.nextLong()); - t.myFloat = randField > 4 ? null : Float - .valueOf(r.nextFloat() * 10 - 5); - t.myDouble = randField > 5 ? null : Double - .valueOf(r.nextDouble() * 10 - 5); - t.myString = randField > 6 ? null : getRandString(r); - t.myDecimal = randField > 7 ? null : getRandHiveDecimal(r); - t.myDate = randField > 8 ? null : getRandDate(r); - t.myStruct = randField > 9 ? null : new MyTestInnerStruct( - r.nextInt(5) - 2, r.nextInt(5) - 2); - t.myList = randField > 10 ? null : getRandIntegerArray(r); - t.myBA = getRandBA(r, i); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); rows[i] = t; } @@ -234,10 +162,13 @@ String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + String order; + order = StringUtils.leftPad("", MyTestClass.fieldCount, '+'); testBinarySortableSerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes, - "++++++++++++"), true); + order), true); + order = StringUtils.leftPad("", MyTestClass.fieldCount, '-'); testBinarySortableSerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes, - "------------"), false); + order), false); System.out.println("Test testTBinarySortableProtocol passed!"); } catch (Throwable e) { Index: serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java (revision 0) +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java (working copy) @@ -0,0 +1,261 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazy; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.VerifyFast; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; +import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; + +public class TestLazySimpleFast extends TestCase { + + private void testLazySimpleFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, LazySimpleSerDe[] serdes, + StructObjectInspector[] rowOIs, byte separator, LazySerDeParameters[] serdeParams, + PrimitiveTypeInfo[][] primitiveTypeInfosArray) throws Throwable { + + + // Try to serialize + BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + Output output = new Output(); + + LazySimpleSerializeWrite lazySimpleSerializeWrite = + new LazySimpleSerializeWrite(MyTestPrimitiveClass.primitiveCount, + separator, serdeParams[i]); + + lazySimpleSerializeWrite.set(output); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); + VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveCategory, object); + } + + BytesWritable bytesWritable = new BytesWritable(); + bytesWritable.set(output.getData(), 0, output.getLength()); + serializeWriteBytes[i] = bytesWritable; + } + + // Try to deserialize + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + LazySimpleDeserializeRead lazySimpleDeserializeRead = + new LazySimpleDeserializeRead(primitiveTypeInfos, + separator, serdeParams[i]); + + BytesWritable bytesWritable = serializeWriteBytes[i]; + byte[] bytes = bytesWritable.getBytes(); + int length = bytesWritable.getLength(); + lazySimpleDeserializeRead.set(bytes, 0, length); + + char[] chars = new char[length]; + for (int c = 0; c < chars.length; c++) { + chars[c] = (char) (bytes[c] & 0xFF); + } + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], object); + } + lazySimpleDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned()); + } + + // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + BytesWritable bytesWritable = serializeWriteBytes[i]; + LazyStruct lazySimpleStruct = (LazyStruct) serdes[i].deserialize(bytesWritable); + + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; + Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfo); + LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index); + Object object; + if (lazyPrimitive != null) { + object = lazyPrimitive.getWritableObject(); + } else { + object = null; + } + if (expected == null || object == null) { + if (expected != null || object != null) { + fail("SerDe deserialized NULL column mismatch"); + } + } else { + if (!object.equals(expected)) { + fail("SerDe deserialized value does not match"); + } + } + } + } + + // One Writable per row. + byte[][] serdeBytes = new byte[myTestPrimitiveClasses.length][]; + + // Serialize using the SerDe, then below deserialize using DeserializeRead. + Object[] row = new Object[MyTestPrimitiveClass.primitiveCount]; + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + + // LazySimple seems to work better with an row object array instead of a Java object... + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); + row[index] = object; + } + + Text serialized = (Text) serdes[i].serialize(row, rowOIs[i]); + byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()); + + byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); + if (!Arrays.equals(bytes1, bytes2)) { + fail("SerializeWrite and SerDe serialization does not match"); + } + serdeBytes[i] = serialized.copyBytes(); + } + + // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + LazySimpleDeserializeRead lazySimpleDeserializeRead = + new LazySimpleDeserializeRead(primitiveTypeInfos, + separator, serdeParams[i]); + + byte[] bytes = serdeBytes[i]; + lazySimpleDeserializeRead.set(bytes, 0, bytes.length); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], object); + } + lazySimpleDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned()); + } + } + + private Properties createProperties(String fieldNames, String fieldTypes) { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); + + tbl.setProperty("columns", fieldNames); + tbl.setProperty("columns.types", fieldTypes); + + tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); + + return tbl; + } + + private LazySimpleSerDe getSerDe(String fieldNames, String fieldTypes) throws SerDeException { + // Create the SerDe + LazySimpleSerDe serDe = new LazySimpleSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createProperties(fieldNames, fieldTypes); + SerDeUtils.initializeSerDe(serDe, conf, tbl, null); + return serDe; + } + + private LazySerDeParameters getSerDeParams(String fieldNames, String fieldTypes) throws SerDeException { + Configuration conf = new Configuration(); + Properties tbl = createProperties(fieldNames, fieldTypes); + return new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); + } + + public void testLazySimpleFast() throws Throwable { + try { + + int num = 1000; + Random r = new Random(1234); + MyTestPrimitiveClass[] rows = new MyTestPrimitiveClass[num]; + PrimitiveTypeInfo[][] primitiveTypeInfosArray = new PrimitiveTypeInfo[num][]; + for (int i = 0; i < num; i++) { + int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); + MyTestPrimitiveClass t = new MyTestPrimitiveClass(); + int field = 0; + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, randField, field, extraTypeInfo); + PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); + rows[i] = t; + primitiveTypeInfosArray[i] = primitiveTypeInfos; + } + + // To get the specific type information for CHAR and VARCHAR, seems like we need an + // inspector and SerDe per row... + StructObjectInspector[] rowOIs = new StructObjectInspector[num]; + LazySimpleSerDe[] serdes = new LazySimpleSerDe[num]; + LazySerDeParameters[] serdeParams = new LazySerDeParameters[num]; + for (int i = 0; i < num; i++) { + MyTestPrimitiveClass t = rows[i]; + + StructObjectInspector rowOI = t.getRowInspector(primitiveTypeInfosArray[i]); + + String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + + rowOIs[i] = rowOI; + serdes[i] = getSerDe(fieldNames, fieldTypes); + serdeParams[i] = getSerDeParams(fieldNames, fieldTypes); + } + + byte separator = (byte) '\t'; + testLazySimpleFast(rows, serdes, rowOIs, separator, serdeParams, primitiveTypeInfosArray); + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +} \ No newline at end of file Index: serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java (revision 1673556) +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java (working copy) @@ -18,49 +18,119 @@ package org.apache.hadoop.hive.serde2.lazybinary; import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; +import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; /** * MyTestClassBigger. * */ public class MyTestClassBigger { - Byte myByte; - Short myShort; - Integer myInt; - Long myLong; - Float myFloat; - Double myDouble; - String myString; - HiveDecimal myDecimal; - Date myDate; - MyTestInnerStruct myStruct; - List myList; - byte[] myBA; + + // The primitives. + public Boolean myBool; + public Byte myByte; + public Short myShort; + public Integer myInt; + public Long myLong; + public Float myFloat; + public Double myDouble; + public String myString; + public HiveChar myHiveChar; + public HiveVarchar myHiveVarchar; + public byte[] myBinary; + public HiveDecimal myDecimal; + public Date myDate; + public Timestamp myTimestamp; + public HiveIntervalYearMonth myIntervalYearMonth; + public HiveIntervalDayTime myIntervalDayTime; + + + // Add more complex types. + public MyTestInnerStruct myStruct; + public List myList; + + // Bigger addition. Map> myMap; + public final static int mapPos = 18; + public MyTestClassBigger() { } - public MyTestClassBigger(Byte b, Short s, Integer i, Long l, Float f, - Double d, String st, HiveDecimal bd, Date date, MyTestInnerStruct is, List li, - byte[] ba, Map> mp) { - myByte = b; - myShort = s; - myInt = i; - myLong = l; - myFloat = f; - myDouble = d; - myString = st; - myDecimal = bd; - myDate = date; - myStruct = is; - myList = li; - myBA = ba; - myMap = mp; + public final static int biggerCount = 19; + + public int randomFill(Random r, ExtraTypeInfo extraTypeInfo) { + int randField = r.nextInt(biggerCount); + int field = 0; + myBool = (randField == field++) ? null : (r.nextInt(1) == 1); + myByte = (randField == field++) ? null : Byte.valueOf((byte) r.nextInt()); + myShort = (randField == field++) ? null : Short.valueOf((short) r.nextInt()); + myInt = (randField == field++) ? null : Integer.valueOf(r.nextInt()); + myLong = (randField == field++) ? null : Long.valueOf(r.nextLong()); + myFloat = (randField == field++) ? null : Float + .valueOf(r.nextFloat() * 10 - 5); + myDouble = (randField == field++) ? null : Double + .valueOf(r.nextDouble() * 10 - 5); + myString = (randField == field++) ? null : MyTestPrimitiveClass.getRandString(r); + myHiveChar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveChar(r, extraTypeInfo); + myHiveVarchar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveVarchar(r, extraTypeInfo); + myBinary = MyTestPrimitiveClass.getRandBinary(r, r.nextInt(1000)); + myDecimal = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveDecimal(r, extraTypeInfo); + myDate = (randField == field++) ? null : MyTestPrimitiveClass.getRandDate(r); + myTimestamp = (randField == field++) ? null : MyTestPrimitiveClass.getRandTimestamp(r); + myIntervalYearMonth = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalYearMonth(r); + myIntervalDayTime = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalDayTime(r); + + myStruct = (randField == field++) ? null : new MyTestInnerStruct( + r.nextInt(5) - 2, r.nextInt(5) - 2); + myList = (randField == field++) ? null : MyTestClass.getRandIntegerArray(r); + + Map> mp = new HashMap>(); + String key = MyTestPrimitiveClass.getRandString(r); + List value = randField > 9 ? null + : getRandStructArray(r); + mp.put(key, value); + String key1 = MyTestPrimitiveClass.getRandString(r); + mp.put(key1, null); + String key2 = MyTestPrimitiveClass.getRandString(r); + List value2 = getRandStructArray(r); + mp.put(key2, value2); + myMap = mp; + return field; } + + /** + * Generate a random struct array. + * + * @param r + * random number generator + * @return an struct array + */ + static List getRandStructArray(Random r) { + int length = r.nextInt(10); + ArrayList result = new ArrayList( + length); + for (int i = 0; i < length; i++) { + MyTestInnerStruct ti = new MyTestInnerStruct(r.nextInt(), r.nextInt()); + result.add(ti); + } + return result; + } + } Index: serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java (revision 1673556) +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java (working copy) @@ -18,36 +18,68 @@ package org.apache.hadoop.hive.serde2.lazybinary; import java.sql.Date; +import java.sql.Timestamp; +import java.util.Random; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; public class MyTestClassSmaller { - Byte myByte; - Short myShort; - Integer myInt; - Long myLong; - Float myFloat; - Double myDouble; - String myString; - HiveDecimal myDecimal; - Date myDate; + + public Boolean myBool; + public Byte myByte; + public Short myShort; + public Integer myInt; + public Long myLong; + public Float myFloat; + public Double myDouble; + public String myString; + public HiveChar myHiveChar; + public HiveVarchar myHiveVarchar; + public byte[] myBinary; + public HiveDecimal myDecimal; + public Date myDate; + public Timestamp myTimestamp; + public HiveIntervalYearMonth myIntervalYearMonth; + public HiveIntervalDayTime myIntervalDayTime; + MyTestInnerStruct myStruct; - public MyTestClassSmaller() { + public final static int smallerCount = 17; + + public int randomFill(Random r, ExtraTypeInfo extraTypeInfo) { + int randField = r.nextInt(smallerCount); + int field = 0; + + myBool = (randField == field++) ? null : (r.nextInt(1) == 1); + myByte = (randField == field++) ? null : Byte.valueOf((byte) r.nextInt()); + myShort = (randField == field++) ? null : Short.valueOf((short) r.nextInt()); + myInt = (randField == field++) ? null : Integer.valueOf(r.nextInt()); + myLong = (randField == field++) ? null : Long.valueOf(r.nextLong()); + myFloat = (randField == field++) ? null : Float + .valueOf(r.nextFloat() * 10 - 5); + myDouble = (randField == field++) ? null : Double + .valueOf(r.nextDouble() * 10 - 5); + myString = (randField == field++) ? null : MyTestPrimitiveClass.getRandString(r); + myHiveChar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveChar(r, extraTypeInfo); + myHiveVarchar = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveVarchar(r, extraTypeInfo); + myBinary = MyTestPrimitiveClass.getRandBinary(r, r.nextInt(1000)); + myDecimal = (randField == field++) ? null : MyTestPrimitiveClass.getRandHiveDecimal(r, extraTypeInfo); + myDate = (randField == field++) ? null : MyTestPrimitiveClass.getRandDate(r); + myTimestamp = (randField == field++) ? null : MyTestPrimitiveClass.getRandTimestamp(r); + myIntervalYearMonth = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalYearMonth(r); + myIntervalDayTime = (randField == field++) ? null : MyTestPrimitiveClass.getRandIntervalDayTime(r); + + myStruct = (randField == field++) ? null : new MyTestInnerStruct( + r.nextInt(5) - 2, r.nextInt(5) - 2); + return field; } - public MyTestClassSmaller(Byte b, Short s, Integer i, Long l, Float f, - Double d, String st, HiveDecimal bd, Date date, MyTestInnerStruct is) { - myByte = b; - myShort = s; - myInt = i; - myLong = l; - myFloat = f; - myDouble = d; - myString = st; - myDecimal = bd; - myDate = date; - myStruct = is; - } } Index: serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java (revision 0) +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java (working copy) @@ -0,0 +1,205 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.VerifyFast; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BytesWritable; + +public class TestLazyBinaryFast extends TestCase { + + private void testLazyBinaryFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, SerDe[] serdes, StructObjectInspector[] rowOIs, + PrimitiveTypeInfo[][] primitiveTypeInfosArray) throws Throwable { + + LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(MyTestPrimitiveClass.primitiveCount); + + // Try to serialize + BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + Output output = new Output(); + lazyBinarySerializeWrite.set(output); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); + VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveCategory, object); + } + + BytesWritable bytesWritable = new BytesWritable(); + bytesWritable.set(output.getData(), 0, output.getLength()); + serializeWriteBytes[i] = bytesWritable; + } + + // Try to deserialize + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + new LazyBinaryDeserializeRead(primitiveTypeInfos); + + BytesWritable bytesWritable = serializeWriteBytes[i]; + lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], object); + } + lazyBinaryDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned()); + } + + // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + BytesWritable bytesWritable = serializeWriteBytes[i]; + LazyBinaryStruct lazyBinaryStruct = (LazyBinaryStruct) serdes[i].deserialize(bytesWritable); + + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; + Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfo); + Object object = lazyBinaryStruct.getField(index); + if (expected == null || object == null) { + if (expected != null || object != null) { + fail("SerDe deserialized NULL column mismatch"); + } + } else { + if (!object.equals(expected)) { + fail("SerDe deserialized value does not match"); + } + } + } + } + + // One Writable per row. + BytesWritable serdeBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; + + // Serialize using the SerDe, then below deserialize using DeserializeRead. + Object[] row = new Object[MyTestPrimitiveClass.primitiveCount]; + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + + // LazyBinary seems to work better with an row object array instead of a Java object... + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); + row[index] = object; + } + + BytesWritable serialized = (BytesWritable) serdes[i].serialize(row, rowOIs[i]); + BytesWritable bytesWritable = new BytesWritable(); + bytesWritable.set(serialized); + byte[] bytes1 = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); + if (!Arrays.equals(bytes1, bytes2)) { + fail("SerializeWrite and SerDe serialization does not match"); + } + serdeBytes[i] = bytesWritable; + } + + // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. + for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + new LazyBinaryDeserializeRead(primitiveTypeInfos); + + BytesWritable bytesWritable = serdeBytes[i]; + lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + + for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + Object object = t.getPrimitiveObject(index); + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], object); + } + lazyBinaryDeserializeRead.extraFieldsCheck(); + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); + TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned()); + } + } + + public void testLazyBinaryFast() throws Throwable { + try { + + int num = 1000; + Random r = new Random(1234); + MyTestPrimitiveClass[] rows = new MyTestPrimitiveClass[num]; + PrimitiveTypeInfo[][] primitiveTypeInfosArray = new PrimitiveTypeInfo[num][]; + for (int i = 0; i < num; i++) { + int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); + MyTestPrimitiveClass t = new MyTestPrimitiveClass(); + int field = 0; + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, randField, field, extraTypeInfo); + PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); + rows[i] = t; + primitiveTypeInfosArray[i] = primitiveTypeInfos; + } + + // To get the specific type information for CHAR and VARCHAR, seems like we need an + // inspector and SerDe per row... + StructObjectInspector[] rowOIs = new StructObjectInspector[num]; + SerDe[] serdes = new SerDe[num]; + for (int i = 0; i < num; i++) { + MyTestPrimitiveClass t = rows[i]; + + StructObjectInspector rowOI = t.getRowInspector(primitiveTypeInfosArray[i]); + + String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + + rowOIs[i] = rowOI; + serdes[i] = TestLazyBinarySerDe.getSerDe(fieldNames, fieldTypes); + } + + testLazyBinaryFast(rows, serdes, rowOIs, primitiveTypeInfosArray); + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +} \ No newline at end of file Index: serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java (revision 1673556) +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java (working copy) @@ -36,7 +36,9 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyBinary; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; @@ -91,7 +93,7 @@ * @return the initialized LazyBinarySerDe * @throws Throwable */ - private SerDe getSerDe(String fieldNames, String fieldTypes) throws Throwable { + protected static SerDe getSerDe(String fieldNames, String fieldTypes) throws Throwable { Properties schema = new Properties(); schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames); schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes); @@ -194,46 +196,20 @@ int num = 100; for (int itest = 0; itest < num; itest++) { - int randField = r.nextInt(11); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - List li = randField > 10 ? null : TestBinarySortableSerDe - .getRandIntegerArray(r); - byte[] ba = TestBinarySortableSerDe.getRandBA(r, itest); - Map> mp = new HashMap>(); - String key = TestBinarySortableSerDe.getRandString(r); - List value = randField > 9 ? null - : getRandStructArray(r); - mp.put(key, value); - String key1 = TestBinarySortableSerDe.getRandString(r); - mp.put(key1, null); - String key2 = TestBinarySortableSerDe.getRandString(r); - List value2 = getRandStructArray(r); - mp.put(key2, value2); - - MyTestClassBigger input = new MyTestClassBigger(b, s, n, l, f, d, st, bd, date, is, - li, ba, mp); - BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); + MyTestClassBigger t = new MyTestClassBigger(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); + BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1); Object output = serde2.deserialize(bw); - if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { + if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) { System.out.println("structs = " - + SerDeUtils.getJSONString(input, rowOI1)); + + SerDeUtils.getJSONString(t, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); - assertEquals(input, output); + assertEquals(t, output); } } } @@ -263,34 +239,20 @@ int num = 100; for (int itest = 0; itest < num; itest++) { - int randField = r.nextInt(12); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - List li = randField > 10 ? null : TestBinarySortableSerDe - .getRandIntegerArray(r); - byte[] ba = TestBinarySortableSerDe.getRandBA(r, itest); - MyTestClass input = new MyTestClass(b, s, n, l, f, d, st, bd, date, is, li, ba); - BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); + MyTestClass t = new MyTestClass(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); + BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1); Object output = serde2.deserialize(bw); - if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { + if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) { System.out.println("structs = " - + SerDeUtils.getJSONString(input, rowOI1)); + + SerDeUtils.getJSONString(t, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); - assertEquals(input, output); + assertEquals(t, output); } } } @@ -320,34 +282,21 @@ int num = 100; for (int itest = 0; itest < num; itest++) { - int randField = r.nextInt(12); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - List li = randField > 10 ? null : TestBinarySortableSerDe - .getRandIntegerArray(r); - byte[] ba = TestBinarySortableSerDe.getRandBA(r, itest); - MyTestClass input = new MyTestClass(b, s, n, l, f, d, st, bd, date, is, li,ba); - BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); + MyTestClass t = new MyTestClass(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); + + BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1); Object output = serde2.deserialize(bw); - if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { + if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) { System.out.println("structs = " - + SerDeUtils.getJSONString(input, rowOI1)); + + SerDeUtils.getJSONString(t, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); - assertEquals(input, output); + assertEquals(t, output); } } } @@ -377,33 +326,20 @@ int num = 100; for (int itest = 0; itest < num; itest++) { - int randField = r.nextInt(9); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 7 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - - MyTestClassSmaller input = new MyTestClassSmaller(b, s, n, l, f, d, st, bd, date, - is); - BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); + MyTestClassSmaller t = new MyTestClassSmaller(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); + BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1); Object output = serde2.deserialize(bw); - if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { + if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) { System.out.println("structs = " - + SerDeUtils.getJSONString(input, rowOI1)); + + SerDeUtils.getJSONString(t, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); - assertEquals(input, output); + assertEquals(t, output); } } } @@ -421,13 +357,13 @@ StructObjectInspector soi1 = (StructObjectInspector) serdeOI; List fields1 = soi1.getAllStructFieldRefs(); LazyBinaryMapObjectInspector lazympoi = (LazyBinaryMapObjectInspector) fields1 - .get(12).getFieldObjectInspector(); + .get(MyTestClassBigger.mapPos).getFieldObjectInspector(); ObjectInspector lazympkeyoi = lazympoi.getMapKeyObjectInspector(); ObjectInspector lazympvalueoi = lazympoi.getMapValueObjectInspector(); StructObjectInspector soi2 = rowOI; List fields2 = soi2.getAllStructFieldRefs(); - MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(12) + MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(MyTestClassBigger.mapPos) .getFieldObjectInspector(); ObjectInspector inputmpkeyoi = inputmpoi.getMapKeyObjectInspector(); ObjectInspector inputmpvalueoi = inputmpoi.getMapValueObjectInspector(); @@ -439,18 +375,19 @@ int randFields = r.nextInt(10); for (int i = 0; i < randFields; i++) { - String key = TestBinarySortableSerDe.getRandString(r); + String key = MyTestPrimitiveClass.getRandString(r); int randField = r.nextInt(10); List value = randField > 4 ? null : getRandStructArray(r); mp.put(key, value); + } - MyTestClassBigger input = new MyTestClassBigger(null, null, null, null, - null, null, null, null, null, null, null, null, mp); - BytesWritable bw = (BytesWritable) serde.serialize(input, rowOI); + MyTestClassBigger t = new MyTestClassBigger(); + t.myMap = mp; + BytesWritable bw = (BytesWritable) serde.serialize(t, rowOI); Object output = serde.deserialize(bw); - Object lazyobj = soi1.getStructFieldData(output, fields1.get(12)); + Object lazyobj = soi1.getStructFieldData(output, fields1.get(MyTestClassBigger.mapPos)); Map outputmp = lazympoi.getMap(lazyobj); if (outputmp.size() != mp.size()) { @@ -497,23 +434,9 @@ Random r = new Random(1234); MyTestClass rows[] = new MyTestClass[num]; for (int i = 0; i < num; i++) { - int randField = r.nextInt(12); - Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); - Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); - Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); - Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); - Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); - Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); - String st = randField > 6 ? null : TestBinarySortableSerDe - .getRandString(r); - HiveDecimal bd = randField > 7 ? null : TestBinarySortableSerDe.getRandHiveDecimal(r); - Date date = randField > 8 ? null : TestBinarySortableSerDe.getRandDate(r); - MyTestInnerStruct is = randField > 9 ? null : new MyTestInnerStruct(r - .nextInt(5) - 2, r.nextInt(5) - 2); - List li = randField > 10 ? null : TestBinarySortableSerDe - .getRandIntegerArray(r); - byte[] ba = TestBinarySortableSerDe.getRandBA(r, i); - MyTestClass t = new MyTestClass(b, s, n, l, f, d, st, bd, date, is, li, ba); + MyTestClass t = new MyTestClass(); + ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); + t.randomFill(r, extraTypeInfo); rows[i] = t; } Index: service/if/TCLIService.thrift =================================================================== --- service/if/TCLIService.thrift (revision 1673556) +++ service/if/TCLIService.thrift (working copy) @@ -57,6 +57,9 @@ // V7 adds support for delegation token based connection HIVE_CLI_SERVICE_PROTOCOL_V7 + + // V8 adds support for interval types + HIVE_CLI_SERVICE_PROTOCOL_V8 } enum TTypeId { @@ -79,7 +82,9 @@ NULL_TYPE, DATE_TYPE, VARCHAR_TYPE, - CHAR_TYPE + CHAR_TYPE, + INTERVAL_YEAR_MONTH_TYPE, + INTERVAL_DAY_TIME_TYPE } const set PRIMITIVE_TYPES = [ @@ -97,7 +102,9 @@ TTypeId.NULL_TYPE, TTypeId.DATE_TYPE, TTypeId.VARCHAR_TYPE, - TTypeId.CHAR_TYPE + TTypeId.CHAR_TYPE, + TTypeId.INTERVAL_YEAR_MONTH_TYPE, + TTypeId.INTERVAL_DAY_TIME_TYPE ] const set COMPLEX_TYPES = [ @@ -133,6 +140,8 @@ TTypeId.DATE_TYPE: "DATE" TTypeId.VARCHAR_TYPE: "VARCHAR" TTypeId.CHAR_TYPE: "CHAR" + TTypeId.INTERVAL_YEAR_MONTH_TYPE: "INTERVAL_YEAR_MONTH" + TTypeId.INTERVAL_DAY_TIME_TYPE: "INTERVAL_DAY_TIME" } // Thrift does not support recursively defined types or forward declarations, @@ -323,7 +332,7 @@ 4: TI32Value i32Val // INT 5: TI64Value i64Val // BIGINT, TIMESTAMP 6: TDoubleValue doubleVal // FLOAT, DOUBLE - 7: TStringValue stringVal // STRING, LIST, MAP, STRUCT, UNIONTYPE, BINARY, DECIMAL, NULL + 7: TStringValue stringVal // STRING, LIST, MAP, STRUCT, UNIONTYPE, BINARY, DECIMAL, NULL, INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME } // Represents a row in a rowset. @@ -542,7 +551,7 @@ // which operations may be executed. struct TOpenSessionReq { // The version of the HiveServer2 protocol that the client is using. - 1: required TProtocolVersion client_protocol = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V7 + 1: required TProtocolVersion client_protocol = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8 // Username and password for authentication. // Depending on the authentication scheme being used, @@ -561,7 +570,7 @@ 1: required TStatus status // The protocol version that the server is using. - 2: required TProtocolVersion serverProtocolVersion = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V7 + 2: required TProtocolVersion serverProtocolVersion = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8 // Session Handle 3: optional TSessionHandle sessionHandle Index: service/src/gen/thrift/gen-cpp/TCLIService_constants.cpp =================================================================== --- service/src/gen/thrift/gen-cpp/TCLIService_constants.cpp (revision 1673556) +++ service/src/gen/thrift/gen-cpp/TCLIService_constants.cpp (working copy) @@ -26,6 +26,8 @@ PRIMITIVE_TYPES.insert((TTypeId::type)17); PRIMITIVE_TYPES.insert((TTypeId::type)18); PRIMITIVE_TYPES.insert((TTypeId::type)19); + PRIMITIVE_TYPES.insert((TTypeId::type)20); + PRIMITIVE_TYPES.insert((TTypeId::type)21); COMPLEX_TYPES.insert((TTypeId::type)10); COMPLEX_TYPES.insert((TTypeId::type)11); @@ -55,6 +57,8 @@ TYPE_NAMES.insert(std::make_pair((TTypeId::type)17, "DATE")); TYPE_NAMES.insert(std::make_pair((TTypeId::type)18, "VARCHAR")); TYPE_NAMES.insert(std::make_pair((TTypeId::type)19, "CHAR")); + TYPE_NAMES.insert(std::make_pair((TTypeId::type)20, "INTERVAL_YEAR_MONTH")); + TYPE_NAMES.insert(std::make_pair((TTypeId::type)21, "INTERVAL_DAY_TIME")); CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength"; Index: service/src/gen/thrift/gen-cpp/TCLIService_types.cpp =================================================================== --- service/src/gen/thrift/gen-cpp/TCLIService_types.cpp (revision 1673556) +++ service/src/gen/thrift/gen-cpp/TCLIService_types.cpp (working copy) @@ -17,7 +17,8 @@ TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V4, TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V5, TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V6, - TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V7 + TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V7, + TProtocolVersion::HIVE_CLI_SERVICE_PROTOCOL_V8 }; const char* _kTProtocolVersionNames[] = { "HIVE_CLI_SERVICE_PROTOCOL_V1", @@ -26,9 +27,10 @@ "HIVE_CLI_SERVICE_PROTOCOL_V4", "HIVE_CLI_SERVICE_PROTOCOL_V5", "HIVE_CLI_SERVICE_PROTOCOL_V6", - "HIVE_CLI_SERVICE_PROTOCOL_V7" + "HIVE_CLI_SERVICE_PROTOCOL_V7", + "HIVE_CLI_SERVICE_PROTOCOL_V8" }; -const std::map _TProtocolVersion_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(7, _kTProtocolVersionValues, _kTProtocolVersionNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); +const std::map _TProtocolVersion_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(8, _kTProtocolVersionValues, _kTProtocolVersionNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); int _kTTypeIdValues[] = { TTypeId::BOOLEAN_TYPE, @@ -50,7 +52,9 @@ TTypeId::NULL_TYPE, TTypeId::DATE_TYPE, TTypeId::VARCHAR_TYPE, - TTypeId::CHAR_TYPE + TTypeId::CHAR_TYPE, + TTypeId::INTERVAL_YEAR_MONTH_TYPE, + TTypeId::INTERVAL_DAY_TIME_TYPE }; const char* _kTTypeIdNames[] = { "BOOLEAN_TYPE", @@ -72,9 +76,11 @@ "NULL_TYPE", "DATE_TYPE", "VARCHAR_TYPE", - "CHAR_TYPE" + "CHAR_TYPE", + "INTERVAL_YEAR_MONTH_TYPE", + "INTERVAL_DAY_TIME_TYPE" }; -const std::map _TTypeId_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(20, _kTTypeIdValues, _kTTypeIdNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); +const std::map _TTypeId_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(22, _kTTypeIdValues, _kTTypeIdNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); int _kTStatusCodeValues[] = { TStatusCode::SUCCESS_STATUS, Index: service/src/gen/thrift/gen-cpp/TCLIService_types.h =================================================================== --- service/src/gen/thrift/gen-cpp/TCLIService_types.h (revision 1673556) +++ service/src/gen/thrift/gen-cpp/TCLIService_types.h (working copy) @@ -24,7 +24,8 @@ HIVE_CLI_SERVICE_PROTOCOL_V4 = 3, HIVE_CLI_SERVICE_PROTOCOL_V5 = 4, HIVE_CLI_SERVICE_PROTOCOL_V6 = 5, - HIVE_CLI_SERVICE_PROTOCOL_V7 = 6 + HIVE_CLI_SERVICE_PROTOCOL_V7 = 6, + HIVE_CLI_SERVICE_PROTOCOL_V8 = 7 }; }; @@ -51,7 +52,9 @@ NULL_TYPE = 16, DATE_TYPE = 17, VARCHAR_TYPE = 18, - CHAR_TYPE = 19 + CHAR_TYPE = 19, + INTERVAL_YEAR_MONTH_TYPE = 20, + INTERVAL_DAY_TIME_TYPE = 21 }; }; @@ -1946,8 +1949,8 @@ static const char* ascii_fingerprint; // = "C8FD0F306A16C16BDA7B57F58BFAE5B2"; static const uint8_t binary_fingerprint[16]; // = {0xC8,0xFD,0x0F,0x30,0x6A,0x16,0xC1,0x6B,0xDA,0x7B,0x57,0xF5,0x8B,0xFA,0xE5,0xB2}; - TOpenSessionReq() : client_protocol((TProtocolVersion::type)6), username(), password() { - client_protocol = (TProtocolVersion::type)6; + TOpenSessionReq() : client_protocol((TProtocolVersion::type)7), username(), password() { + client_protocol = (TProtocolVersion::type)7; } @@ -2022,8 +2025,8 @@ static const char* ascii_fingerprint; // = "CFE7D7F4E9EC671F2518ED74FEE9F163"; static const uint8_t binary_fingerprint[16]; // = {0xCF,0xE7,0xD7,0xF4,0xE9,0xEC,0x67,0x1F,0x25,0x18,0xED,0x74,0xFE,0xE9,0xF1,0x63}; - TOpenSessionResp() : serverProtocolVersion((TProtocolVersion::type)6) { - serverProtocolVersion = (TProtocolVersion::type)6; + TOpenSessionResp() : serverProtocolVersion((TProtocolVersion::type)7) { + serverProtocolVersion = (TProtocolVersion::type)7; } Index: service/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/service/ThriftHive.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/service/ThriftHive.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/service/ThriftHive.java (working copy) @@ -3023,7 +3023,7 @@ struct.success = new ArrayList(_list0.size); for (int _i1 = 0; _i1 < _list0.size; ++_i1) { - String _elem2; // required + String _elem2; // optional _elem2 = iprot.readString(); struct.success.add(_elem2); } @@ -3122,7 +3122,7 @@ struct.success = new ArrayList(_list5.size); for (int _i6 = 0; _i6 < _list5.size; ++_i6) { - String _elem7; // required + String _elem7; // optional _elem7 = iprot.readString(); struct.success.add(_elem7); } @@ -3785,7 +3785,7 @@ struct.success = new ArrayList(_list8.size); for (int _i9 = 0; _i9 < _list8.size; ++_i9) { - String _elem10; // required + String _elem10; // optional _elem10 = iprot.readString(); struct.success.add(_elem10); } @@ -3884,7 +3884,7 @@ struct.success = new ArrayList(_list13.size); for (int _i14 = 0; _i14 < _list13.size; ++_i14) { - String _elem15; // required + String _elem15; // optional _elem15 = iprot.readString(); struct.success.add(_elem15); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TBinaryColumn.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TBinaryColumn.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TBinaryColumn.java (working copy) @@ -451,7 +451,7 @@ struct.values = new ArrayList(_list110.size); for (int _i111 = 0; _i111 < _list110.size; ++_i111) { - ByteBuffer _elem112; // required + ByteBuffer _elem112; // optional _elem112 = iprot.readBinary(); struct.values.add(_elem112); } @@ -535,7 +535,7 @@ struct.values = new ArrayList(_list115.size); for (int _i116 = 0; _i116 < _list115.size; ++_i116) { - ByteBuffer _elem117; // required + ByteBuffer _elem117; // optional _elem117 = iprot.readBinary(); struct.values.add(_elem117); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TBoolColumn.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TBoolColumn.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TBoolColumn.java (working copy) @@ -449,7 +449,7 @@ struct.values = new ArrayList(_list54.size); for (int _i55 = 0; _i55 < _list54.size; ++_i55) { - boolean _elem56; // required + boolean _elem56; // optional _elem56 = iprot.readBool(); struct.values.add(_elem56); } @@ -533,7 +533,7 @@ struct.values = new ArrayList(_list59.size); for (int _i60 = 0; _i60 < _list59.size; ++_i60) { - boolean _elem61; // required + boolean _elem61; // optional _elem61 = iprot.readBool(); struct.values.add(_elem61); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TByteColumn.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TByteColumn.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TByteColumn.java (working copy) @@ -449,7 +449,7 @@ struct.values = new ArrayList(_list62.size); for (int _i63 = 0; _i63 < _list62.size; ++_i63) { - byte _elem64; // required + byte _elem64; // optional _elem64 = iprot.readByte(); struct.values.add(_elem64); } @@ -533,7 +533,7 @@ struct.values = new ArrayList(_list67.size); for (int _i68 = 0; _i68 < _list67.size; ++_i68) { - byte _elem69; // required + byte _elem69; // optional _elem69 = iprot.readByte(); struct.values.add(_elem69); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java (working copy) @@ -50,6 +50,8 @@ PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.DATE_TYPE); PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.VARCHAR_TYPE); PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.CHAR_TYPE); + PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE); + PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE); } public static final Set COMPLEX_TYPES = new HashSet(); @@ -88,6 +90,8 @@ TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.DATE_TYPE, "DATE"); TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.VARCHAR_TYPE, "VARCHAR"); TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.CHAR_TYPE, "CHAR"); + TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE, "INTERVAL_YEAR_MONTH"); + TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE, "INTERVAL_DAY_TIME"); } public static final String CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength"; Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TDoubleColumn.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TDoubleColumn.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TDoubleColumn.java (working copy) @@ -449,7 +449,7 @@ struct.values = new ArrayList(_list94.size); for (int _i95 = 0; _i95 < _list94.size; ++_i95) { - double _elem96; // required + double _elem96; // optional _elem96 = iprot.readDouble(); struct.values.add(_elem96); } @@ -533,7 +533,7 @@ struct.values = new ArrayList(_list99.size); for (int _i100 = 0; _i100 < _list99.size; ++_i100) { - double _elem101; // required + double _elem101; // optional _elem101 = iprot.readDouble(); struct.values.add(_elem101); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TGetTablesReq.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TGetTablesReq.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TGetTablesReq.java (working copy) @@ -715,7 +715,7 @@ struct.tableTypes = new ArrayList(_list172.size); for (int _i173 = 0; _i173 < _list172.size; ++_i173) { - String _elem174; // required + String _elem174; // optional _elem174 = iprot.readString(); struct.tableTypes.add(_elem174); } @@ -856,7 +856,7 @@ struct.tableTypes = new ArrayList(_list177.size); for (int _i178 = 0; _i178 < _list177.size; ++_i178) { - String _elem179; // required + String _elem179; // optional _elem179 = iprot.readString(); struct.tableTypes.add(_elem179); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TI16Column.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TI16Column.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TI16Column.java (working copy) @@ -449,7 +449,7 @@ struct.values = new ArrayList(_list70.size); for (int _i71 = 0; _i71 < _list70.size; ++_i71) { - short _elem72; // required + short _elem72; // optional _elem72 = iprot.readI16(); struct.values.add(_elem72); } @@ -533,7 +533,7 @@ struct.values = new ArrayList(_list75.size); for (int _i76 = 0; _i76 < _list75.size; ++_i76) { - short _elem77; // required + short _elem77; // optional _elem77 = iprot.readI16(); struct.values.add(_elem77); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TI32Column.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TI32Column.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TI32Column.java (working copy) @@ -449,7 +449,7 @@ struct.values = new ArrayList(_list78.size); for (int _i79 = 0; _i79 < _list78.size; ++_i79) { - int _elem80; // required + int _elem80; // optional _elem80 = iprot.readI32(); struct.values.add(_elem80); } @@ -533,7 +533,7 @@ struct.values = new ArrayList(_list83.size); for (int _i84 = 0; _i84 < _list83.size; ++_i84) { - int _elem85; // required + int _elem85; // optional _elem85 = iprot.readI32(); struct.values.add(_elem85); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TI64Column.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TI64Column.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TI64Column.java (working copy) @@ -449,7 +449,7 @@ struct.values = new ArrayList(_list86.size); for (int _i87 = 0; _i87 < _list86.size; ++_i87) { - long _elem88; // required + long _elem88; // optional _elem88 = iprot.readI64(); struct.values.add(_elem88); } @@ -533,7 +533,7 @@ struct.values = new ArrayList(_list91.size); for (int _i92 = 0; _i92 < _list91.size; ++_i92) { - long _elem93; // required + long _elem93; // optional _elem93 = iprot.readI64(); struct.values.add(_elem93); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TOpenSessionReq.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TOpenSessionReq.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TOpenSessionReq.java (working copy) @@ -141,7 +141,7 @@ } public TOpenSessionReq() { - this.client_protocol = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V7; + this.client_protocol = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8; } @@ -188,7 +188,7 @@ @Override public void clear() { - this.client_protocol = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V7; + this.client_protocol = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8; this.username = null; this.password = null; Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TOpenSessionResp.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TOpenSessionResp.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TOpenSessionResp.java (working copy) @@ -141,7 +141,7 @@ } public TOpenSessionResp() { - this.serverProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V7; + this.serverProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8; } @@ -191,7 +191,7 @@ @Override public void clear() { this.status = null; - this.serverProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V7; + this.serverProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8; this.sessionHandle = null; this.configuration = null; Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TProtocolVersion.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TProtocolVersion.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TProtocolVersion.java (working copy) @@ -18,7 +18,8 @@ HIVE_CLI_SERVICE_PROTOCOL_V4(3), HIVE_CLI_SERVICE_PROTOCOL_V5(4), HIVE_CLI_SERVICE_PROTOCOL_V6(5), - HIVE_CLI_SERVICE_PROTOCOL_V7(6); + HIVE_CLI_SERVICE_PROTOCOL_V7(6), + HIVE_CLI_SERVICE_PROTOCOL_V8(7); private final int value; @@ -53,6 +54,8 @@ return HIVE_CLI_SERVICE_PROTOCOL_V6; case 6: return HIVE_CLI_SERVICE_PROTOCOL_V7; + case 7: + return HIVE_CLI_SERVICE_PROTOCOL_V8; default: return null; } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRow.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRow.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRow.java (working copy) @@ -354,7 +354,7 @@ struct.colVals = new ArrayList(_list46.size); for (int _i47 = 0; _i47 < _list46.size; ++_i47) { - TColumnValue _elem48; // required + TColumnValue _elem48; // optional _elem48 = new TColumnValue(); _elem48.read(iprot); struct.colVals.add(_elem48); @@ -425,7 +425,7 @@ struct.colVals = new ArrayList(_list51.size); for (int _i52 = 0; _i52 < _list51.size; ++_i52) { - TColumnValue _elem53; // required + TColumnValue _elem53; // optional _elem53 = new TColumnValue(); _elem53.read(iprot); struct.colVals.add(_elem53); Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRowSet.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRowSet.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRowSet.java (working copy) @@ -549,7 +549,7 @@ struct.rows = new ArrayList(_list118.size); for (int _i119 = 0; _i119 < _list118.size; ++_i119) { - TRow _elem120; // required + TRow _elem120; // optional _elem120 = new TRow(); _elem120.read(iprot); struct.rows.add(_elem120); @@ -568,7 +568,7 @@ struct.columns = new ArrayList(_list121.size); for (int _i122 = 0; _i122 < _list121.size; ++_i122) { - TColumn _elem123; // required + TColumn _elem123; // optional _elem123 = new TColumn(); _elem123.read(iprot); struct.columns.add(_elem123); @@ -673,7 +673,7 @@ struct.rows = new ArrayList(_list128.size); for (int _i129 = 0; _i129 < _list128.size; ++_i129) { - TRow _elem130; // required + TRow _elem130; // optional _elem130 = new TRow(); _elem130.read(iprot); struct.rows.add(_elem130); @@ -687,7 +687,7 @@ struct.columns = new ArrayList(_list131.size); for (int _i132 = 0; _i132 < _list131.size; ++_i132) { - TColumn _elem133; // required + TColumn _elem133; // optional _elem133 = new TColumn(); _elem133.read(iprot); struct.columns.add(_elem133); Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TStatus.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TStatus.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TStatus.java (working copy) @@ -698,7 +698,7 @@ struct.infoMessages = new ArrayList(_list134.size); for (int _i135 = 0; _i135 < _list134.size; ++_i135) { - String _elem136; // required + String _elem136; // optional _elem136 = iprot.readString(); struct.infoMessages.add(_elem136); } @@ -848,7 +848,7 @@ struct.infoMessages = new ArrayList(_list139.size); for (int _i140 = 0; _i140 < _list139.size; ++_i140) { - String _elem141; // required + String _elem141; // optional _elem141 = iprot.readString(); struct.infoMessages.add(_elem141); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TStringColumn.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TStringColumn.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TStringColumn.java (working copy) @@ -449,7 +449,7 @@ struct.values = new ArrayList(_list102.size); for (int _i103 = 0; _i103 < _list102.size; ++_i103) { - String _elem104; // required + String _elem104; // optional _elem104 = iprot.readString(); struct.values.add(_elem104); } @@ -533,7 +533,7 @@ struct.values = new ArrayList(_list107.size); for (int _i108 = 0; _i108 < _list107.size; ++_i108) { - String _elem109; // required + String _elem109; // optional _elem109 = iprot.readString(); struct.values.add(_elem109); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTableSchema.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTableSchema.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTableSchema.java (working copy) @@ -354,7 +354,7 @@ struct.columns = new ArrayList(_list38.size); for (int _i39 = 0; _i39 < _list38.size; ++_i39) { - TColumnDesc _elem40; // required + TColumnDesc _elem40; // optional _elem40 = new TColumnDesc(); _elem40.read(iprot); struct.columns.add(_elem40); @@ -425,7 +425,7 @@ struct.columns = new ArrayList(_list43.size); for (int _i44 = 0; _i44 < _list43.size; ++_i44) { - TColumnDesc _elem45; // required + TColumnDesc _elem45; // optional _elem45 = new TColumnDesc(); _elem45.read(iprot); struct.columns.add(_elem45); Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeDesc.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeDesc.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeDesc.java (working copy) @@ -354,7 +354,7 @@ struct.types = new ArrayList(_list30.size); for (int _i31 = 0; _i31 < _list30.size; ++_i31) { - TTypeEntry _elem32; // required + TTypeEntry _elem32; // optional _elem32 = new TTypeEntry(); _elem32.read(iprot); struct.types.add(_elem32); @@ -425,7 +425,7 @@ struct.types = new ArrayList(_list35.size); for (int _i36 = 0; _i36 < _list35.size; ++_i36) { - TTypeEntry _elem37; // required + TTypeEntry _elem37; // optional _elem37 = new TTypeEntry(); _elem37.read(iprot); struct.types.add(_elem37); Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeId.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeId.java (revision 1673556) +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeId.java (working copy) @@ -31,7 +31,9 @@ NULL_TYPE(16), DATE_TYPE(17), VARCHAR_TYPE(18), - CHAR_TYPE(19); + CHAR_TYPE(19), + INTERVAL_YEAR_MONTH_TYPE(20), + INTERVAL_DAY_TIME_TYPE(21); private final int value; @@ -92,6 +94,10 @@ return VARCHAR_TYPE; case 19: return CHAR_TYPE; + case 20: + return INTERVAL_YEAR_MONTH_TYPE; + case 21: + return INTERVAL_DAY_TIME_TYPE; default: return null; } Index: service/src/gen/thrift/gen-py/TCLIService/constants.py =================================================================== --- service/src/gen/thrift/gen-py/TCLIService/constants.py (revision 1673556) +++ service/src/gen/thrift/gen-py/TCLIService/constants.py (working copy) @@ -25,6 +25,8 @@ 17, 18, 19, + 20, + 21, ]) COMPLEX_TYPES = set([ 10, @@ -57,6 +59,8 @@ 17 : "DATE", 18 : "VARCHAR", 19 : "CHAR", + 20 : "INTERVAL_YEAR_MONTH", + 21 : "INTERVAL_DAY_TIME", } CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength" PRECISION = "precision" Index: service/src/gen/thrift/gen-py/TCLIService/ttypes.py =================================================================== --- service/src/gen/thrift/gen-py/TCLIService/ttypes.py (revision 1673556) +++ service/src/gen/thrift/gen-py/TCLIService/ttypes.py (working copy) @@ -24,6 +24,7 @@ HIVE_CLI_SERVICE_PROTOCOL_V5 = 4 HIVE_CLI_SERVICE_PROTOCOL_V6 = 5 HIVE_CLI_SERVICE_PROTOCOL_V7 = 6 + HIVE_CLI_SERVICE_PROTOCOL_V8 = 7 _VALUES_TO_NAMES = { 0: "HIVE_CLI_SERVICE_PROTOCOL_V1", @@ -33,6 +34,7 @@ 4: "HIVE_CLI_SERVICE_PROTOCOL_V5", 5: "HIVE_CLI_SERVICE_PROTOCOL_V6", 6: "HIVE_CLI_SERVICE_PROTOCOL_V7", + 7: "HIVE_CLI_SERVICE_PROTOCOL_V8", } _NAMES_TO_VALUES = { @@ -43,6 +45,7 @@ "HIVE_CLI_SERVICE_PROTOCOL_V5": 4, "HIVE_CLI_SERVICE_PROTOCOL_V6": 5, "HIVE_CLI_SERVICE_PROTOCOL_V7": 6, + "HIVE_CLI_SERVICE_PROTOCOL_V8": 7, } class TTypeId: @@ -66,6 +69,8 @@ DATE_TYPE = 17 VARCHAR_TYPE = 18 CHAR_TYPE = 19 + INTERVAL_YEAR_MONTH_TYPE = 20 + INTERVAL_DAY_TIME_TYPE = 21 _VALUES_TO_NAMES = { 0: "BOOLEAN_TYPE", @@ -88,6 +93,8 @@ 17: "DATE_TYPE", 18: "VARCHAR_TYPE", 19: "CHAR_TYPE", + 20: "INTERVAL_YEAR_MONTH_TYPE", + 21: "INTERVAL_DAY_TIME_TYPE", } _NAMES_TO_VALUES = { @@ -111,6 +118,8 @@ "DATE_TYPE": 17, "VARCHAR_TYPE": 18, "CHAR_TYPE": 19, + "INTERVAL_YEAR_MONTH_TYPE": 20, + "INTERVAL_DAY_TIME_TYPE": 21, } class TStatusCode: @@ -3245,7 +3254,7 @@ thrift_spec = ( None, # 0 - (1, TType.I32, 'client_protocol', None, 6, ), # 1 + (1, TType.I32, 'client_protocol', None, 7, ), # 1 (2, TType.STRING, 'username', None, None, ), # 2 (3, TType.STRING, 'password', None, None, ), # 3 (4, TType.MAP, 'configuration', (TType.STRING,None,TType.STRING,None), None, ), # 4 @@ -3354,7 +3363,7 @@ thrift_spec = ( None, # 0 (1, TType.STRUCT, 'status', (TStatus, TStatus.thrift_spec), None, ), # 1 - (2, TType.I32, 'serverProtocolVersion', None, 6, ), # 2 + (2, TType.I32, 'serverProtocolVersion', None, 7, ), # 2 (3, TType.STRUCT, 'sessionHandle', (TSessionHandle, TSessionHandle.thrift_spec), None, ), # 3 (4, TType.MAP, 'configuration', (TType.STRING,None,TType.STRING,None), None, ), # 4 ) Index: service/src/gen/thrift/gen-rb/t_c_l_i_service_constants.rb =================================================================== --- service/src/gen/thrift/gen-rb/t_c_l_i_service_constants.rb (revision 1673556) +++ service/src/gen/thrift/gen-rb/t_c_l_i_service_constants.rb (working copy) @@ -23,6 +23,8 @@ 17, 18, 19, + 20, + 21, ]) COMPLEX_TYPES = Set.new([ @@ -58,6 +60,8 @@ 17 => %q"DATE", 18 => %q"VARCHAR", 19 => %q"CHAR", + 20 => %q"INTERVAL_YEAR_MONTH", + 21 => %q"INTERVAL_DAY_TIME", } CHARACTER_MAXIMUM_LENGTH = %q"characterMaximumLength" Index: service/src/gen/thrift/gen-rb/t_c_l_i_service_types.rb =================================================================== --- service/src/gen/thrift/gen-rb/t_c_l_i_service_types.rb (revision 1673556) +++ service/src/gen/thrift/gen-rb/t_c_l_i_service_types.rb (working copy) @@ -14,8 +14,9 @@ HIVE_CLI_SERVICE_PROTOCOL_V5 = 4 HIVE_CLI_SERVICE_PROTOCOL_V6 = 5 HIVE_CLI_SERVICE_PROTOCOL_V7 = 6 - VALUE_MAP = {0 => "HIVE_CLI_SERVICE_PROTOCOL_V1", 1 => "HIVE_CLI_SERVICE_PROTOCOL_V2", 2 => "HIVE_CLI_SERVICE_PROTOCOL_V3", 3 => "HIVE_CLI_SERVICE_PROTOCOL_V4", 4 => "HIVE_CLI_SERVICE_PROTOCOL_V5", 5 => "HIVE_CLI_SERVICE_PROTOCOL_V6", 6 => "HIVE_CLI_SERVICE_PROTOCOL_V7"} - VALID_VALUES = Set.new([HIVE_CLI_SERVICE_PROTOCOL_V1, HIVE_CLI_SERVICE_PROTOCOL_V2, HIVE_CLI_SERVICE_PROTOCOL_V3, HIVE_CLI_SERVICE_PROTOCOL_V4, HIVE_CLI_SERVICE_PROTOCOL_V5, HIVE_CLI_SERVICE_PROTOCOL_V6, HIVE_CLI_SERVICE_PROTOCOL_V7]).freeze + HIVE_CLI_SERVICE_PROTOCOL_V8 = 7 + VALUE_MAP = {0 => "HIVE_CLI_SERVICE_PROTOCOL_V1", 1 => "HIVE_CLI_SERVICE_PROTOCOL_V2", 2 => "HIVE_CLI_SERVICE_PROTOCOL_V3", 3 => "HIVE_CLI_SERVICE_PROTOCOL_V4", 4 => "HIVE_CLI_SERVICE_PROTOCOL_V5", 5 => "HIVE_CLI_SERVICE_PROTOCOL_V6", 6 => "HIVE_CLI_SERVICE_PROTOCOL_V7", 7 => "HIVE_CLI_SERVICE_PROTOCOL_V8"} + VALID_VALUES = Set.new([HIVE_CLI_SERVICE_PROTOCOL_V1, HIVE_CLI_SERVICE_PROTOCOL_V2, HIVE_CLI_SERVICE_PROTOCOL_V3, HIVE_CLI_SERVICE_PROTOCOL_V4, HIVE_CLI_SERVICE_PROTOCOL_V5, HIVE_CLI_SERVICE_PROTOCOL_V6, HIVE_CLI_SERVICE_PROTOCOL_V7, HIVE_CLI_SERVICE_PROTOCOL_V8]).freeze end module TTypeId @@ -39,8 +40,10 @@ DATE_TYPE = 17 VARCHAR_TYPE = 18 CHAR_TYPE = 19 - VALUE_MAP = {0 => "BOOLEAN_TYPE", 1 => "TINYINT_TYPE", 2 => "SMALLINT_TYPE", 3 => "INT_TYPE", 4 => "BIGINT_TYPE", 5 => "FLOAT_TYPE", 6 => "DOUBLE_TYPE", 7 => "STRING_TYPE", 8 => "TIMESTAMP_TYPE", 9 => "BINARY_TYPE", 10 => "ARRAY_TYPE", 11 => "MAP_TYPE", 12 => "STRUCT_TYPE", 13 => "UNION_TYPE", 14 => "USER_DEFINED_TYPE", 15 => "DECIMAL_TYPE", 16 => "NULL_TYPE", 17 => "DATE_TYPE", 18 => "VARCHAR_TYPE", 19 => "CHAR_TYPE"} - VALID_VALUES = Set.new([BOOLEAN_TYPE, TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE, FLOAT_TYPE, DOUBLE_TYPE, STRING_TYPE, TIMESTAMP_TYPE, BINARY_TYPE, ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, UNION_TYPE, USER_DEFINED_TYPE, DECIMAL_TYPE, NULL_TYPE, DATE_TYPE, VARCHAR_TYPE, CHAR_TYPE]).freeze + INTERVAL_YEAR_MONTH_TYPE = 20 + INTERVAL_DAY_TIME_TYPE = 21 + VALUE_MAP = {0 => "BOOLEAN_TYPE", 1 => "TINYINT_TYPE", 2 => "SMALLINT_TYPE", 3 => "INT_TYPE", 4 => "BIGINT_TYPE", 5 => "FLOAT_TYPE", 6 => "DOUBLE_TYPE", 7 => "STRING_TYPE", 8 => "TIMESTAMP_TYPE", 9 => "BINARY_TYPE", 10 => "ARRAY_TYPE", 11 => "MAP_TYPE", 12 => "STRUCT_TYPE", 13 => "UNION_TYPE", 14 => "USER_DEFINED_TYPE", 15 => "DECIMAL_TYPE", 16 => "NULL_TYPE", 17 => "DATE_TYPE", 18 => "VARCHAR_TYPE", 19 => "CHAR_TYPE", 20 => "INTERVAL_YEAR_MONTH_TYPE", 21 => "INTERVAL_DAY_TIME_TYPE"} + VALID_VALUES = Set.new([BOOLEAN_TYPE, TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE, FLOAT_TYPE, DOUBLE_TYPE, STRING_TYPE, TIMESTAMP_TYPE, BINARY_TYPE, ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, UNION_TYPE, USER_DEFINED_TYPE, DECIMAL_TYPE, NULL_TYPE, DATE_TYPE, VARCHAR_TYPE, CHAR_TYPE, INTERVAL_YEAR_MONTH_TYPE, INTERVAL_DAY_TIME_TYPE]).freeze end module TStatusCode @@ -947,7 +950,7 @@ CONFIGURATION = 4 FIELDS = { - CLIENT_PROTOCOL => {:type => ::Thrift::Types::I32, :name => 'client_protocol', :default => 6, :enum_class => ::TProtocolVersion}, + CLIENT_PROTOCOL => {:type => ::Thrift::Types::I32, :name => 'client_protocol', :default => 7, :enum_class => ::TProtocolVersion}, USERNAME => {:type => ::Thrift::Types::STRING, :name => 'username', :optional => true}, PASSWORD => {:type => ::Thrift::Types::STRING, :name => 'password', :optional => true}, CONFIGURATION => {:type => ::Thrift::Types::MAP, :name => 'configuration', :key => {:type => ::Thrift::Types::STRING}, :value => {:type => ::Thrift::Types::STRING}, :optional => true} @@ -974,7 +977,7 @@ FIELDS = { STATUS => {:type => ::Thrift::Types::STRUCT, :name => 'status', :class => ::TStatus}, - SERVERPROTOCOLVERSION => {:type => ::Thrift::Types::I32, :name => 'serverProtocolVersion', :default => 6, :enum_class => ::TProtocolVersion}, + SERVERPROTOCOLVERSION => {:type => ::Thrift::Types::I32, :name => 'serverProtocolVersion', :default => 7, :enum_class => ::TProtocolVersion}, SESSIONHANDLE => {:type => ::Thrift::Types::STRUCT, :name => 'sessionHandle', :class => ::TSessionHandle, :optional => true}, CONFIGURATION => {:type => ::Thrift::Types::MAP, :name => 'configuration', :key => {:type => ::Thrift::Types::STRING}, :value => {:type => ::Thrift::Types::STRING}, :optional => true} } Index: service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java =================================================================== --- service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java (revision 1673556) +++ service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java (working copy) @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.shims.HadoopShims.KerberosNameShim; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.thrift.DBTokenStore; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; import org.apache.hadoop.security.SecurityUtil; @@ -110,9 +111,17 @@ conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL)); // start delegation token manager try { - HMSHandler baseHandler = new HiveMetaStore.HMSHandler( - "new db based metaserver", conf, true); - saslServer.startDelegationTokenSecretManager(conf, baseHandler.getMS(), ServerMode.HIVESERVER2); + // rawStore is only necessary for DBTokenStore + Object rawStore = null; + String tokenStoreClass = conf.getVar(HiveConf.ConfVars.METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS); + + if (tokenStoreClass.equals(DBTokenStore.class.getName())) { + HMSHandler baseHandler = new HiveMetaStore.HMSHandler( + "new db based metaserver", conf, true); + rawStore = baseHandler.getMS(); + } + + saslServer.startDelegationTokenSecretManager(conf, rawStore, ServerMode.HIVESERVER2); } catch (MetaException|IOException e) { throw new TTransportException("Failed to start token manager", e); Index: service/src/java/org/apache/hive/service/cli/CLIServiceUtils.java =================================================================== --- service/src/java/org/apache/hive/service/cli/CLIServiceUtils.java (revision 1673556) +++ service/src/java/org/apache/hive/service/cli/CLIServiceUtils.java (working copy) @@ -18,6 +18,9 @@ package org.apache.hive.service.cli; +import org.apache.log4j.Layout; +import org.apache.log4j.PatternLayout; + /** * CLIServiceUtils. * @@ -26,6 +29,10 @@ private static final char SEARCH_STRING_ESCAPE = '\\'; + public static final Layout verboseLayout = new PatternLayout( + "%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n"); + public static final Layout nonVerboseLayout = new PatternLayout( + "%-5p : %m%n"); /** * Convert a SQL search pattern into an equivalent Java Regex. Index: service/src/java/org/apache/hive/service/cli/ColumnValue.java =================================================================== --- service/src/java/org/apache/hive/service/cli/ColumnValue.java (revision 1673556) +++ service/src/java/org/apache/hive/service/cli/ColumnValue.java (working copy) @@ -24,6 +24,8 @@ import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hive.service.cli.thrift.TBoolValue; import org.apache.hive.service.cli.thrift.TByteValue; @@ -144,6 +146,22 @@ return TColumnValue.stringVal(tStrValue); } + private static TColumnValue stringValue(HiveIntervalYearMonth value) { + TStringValue tStrValue = new TStringValue(); + if (value != null) { + tStrValue.setValue(value.toString()); + } + return TColumnValue.stringVal(tStrValue); + } + + private static TColumnValue stringValue(HiveIntervalDayTime value) { + TStringValue tStrValue = new TStringValue(); + if (value != null) { + tStrValue.setValue(value.toString()); + } + return TColumnValue.stringVal(tStrValue); + } + public static TColumnValue toTColumnValue(Type type, Object value) { switch (type) { case BOOLEAN_TYPE: @@ -170,6 +188,10 @@ return dateValue((Date)value); case TIMESTAMP_TYPE: return timestampValue((Timestamp)value); + case INTERVAL_YEAR_MONTH_TYPE: + return stringValue((HiveIntervalYearMonth) value); + case INTERVAL_DAY_TIME_TYPE: + return stringValue((HiveIntervalDayTime) value); case DECIMAL_TYPE: return stringValue(((HiveDecimal)value)); case BINARY_TYPE: Index: service/src/java/org/apache/hive/service/cli/Type.java =================================================================== --- service/src/java/org/apache/hive/service/cli/Type.java (revision 1673556) +++ service/src/java/org/apache/hive/service/cli/Type.java (working copy) @@ -69,6 +69,12 @@ TIMESTAMP_TYPE("TIMESTAMP", java.sql.Types.TIMESTAMP, TTypeId.TIMESTAMP_TYPE), + INTERVAL_YEAR_MONTH_TYPE("INTERVAL_YEAR_MONTH", + java.sql.Types.OTHER, + TTypeId.INTERVAL_YEAR_MONTH_TYPE), + INTERVAL_DAY_TIME_TYPE("INTERVAL_DAY_TIME", + java.sql.Types.OTHER, + TTypeId.INTERVAL_DAY_TIME_TYPE), BINARY_TYPE("BINARY", java.sql.Types.BINARY, TTypeId.BINARY_TYPE), Index: service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java =================================================================== --- service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java (revision 1673556) +++ service/src/java/org/apache/hive/service/cli/operation/LogDivertAppender.java (working copy) @@ -18,10 +18,16 @@ package org.apache.hive.service.cli.operation; import java.io.CharArrayWriter; +import java.util.Enumeration; import java.util.regex.Pattern; import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.session.OperationLog; +import org.apache.hadoop.hive.ql.session.OperationLog.LoggingLevel; +import org.apache.hive.service.cli.CLIServiceUtils; +import org.apache.log4j.Appender; +import org.apache.log4j.ConsoleAppender; import org.apache.log4j.Layout; import org.apache.log4j.Logger; import org.apache.log4j.WriterAppender; @@ -36,6 +42,8 @@ public class LogDivertAppender extends WriterAppender { private static final Logger LOG = Logger.getLogger(LogDivertAppender.class.getName()); private final OperationManager operationManager; + private boolean isVerbose; + private Layout verboseLayout; /** * A log filter that filters messages coming from the logger with the given names. @@ -45,18 +53,71 @@ * White list filter is used for less verbose log collection */ private static class NameFilter extends Filter { - private final Pattern namePattern; - private final boolean excludeMatches; + private Pattern namePattern; + private LoggingLevel loggingMode; + private OperationManager operationManager; - public NameFilter(boolean isExclusionFilter, String [] loggerNames) { - this.excludeMatches = isExclusionFilter; - String matchRegex = Joiner.on("|").join(loggerNames); - this.namePattern = Pattern.compile(matchRegex); + /* Patterns that are excluded in verbose logging level. + * Filter out messages coming from log processing classes, or we'll run an infinite loop. + */ + private static final Pattern verboseExcludeNamePattern = Pattern.compile(Joiner.on("|"). + join(new String[] {LOG.getName(), OperationLog.class.getName(), + OperationManager.class.getName()})); + + /* Patterns that are included in execution logging level. + * In execution mode, show only select logger messages. + */ + private static final Pattern executionIncludeNamePattern = Pattern.compile(Joiner.on("|"). + join(new String[] {"org.apache.hadoop.mapreduce.JobSubmitter", + "org.apache.hadoop.mapreduce.Job", "SessionState", Task.class.getName(), + "org.apache.hadoop.hive.ql.exec.spark.status.SparkJobMonitor"})); + + /* Patterns that are included in performance logging level. + * In performance mode, show execution and performance logger messages. + */ + private static final Pattern performanceIncludeNamePattern = Pattern.compile( + executionIncludeNamePattern.pattern() + "|" + PerfLogger.class.getName()); + + private void setCurrentNamePattern(OperationLog.LoggingLevel mode) { + if (mode == OperationLog.LoggingLevel.VERBOSE) { + this.namePattern = verboseExcludeNamePattern; + } else if (mode == OperationLog.LoggingLevel.EXECUTION) { + this.namePattern = executionIncludeNamePattern; + } else if (mode == OperationLog.LoggingLevel.PERFORMANCE) { + this.namePattern = performanceIncludeNamePattern; + } } + public NameFilter( + OperationLog.LoggingLevel loggingMode, OperationManager op) { + this.operationManager = op; + this.loggingMode = loggingMode; + setCurrentNamePattern(loggingMode); + } + @Override public int decide(LoggingEvent ev) { + OperationLog log = operationManager.getOperationLogByThread(); + boolean excludeMatches = (loggingMode == OperationLog.LoggingLevel.VERBOSE); + + if (log == null) { + return Filter.DENY; + } + + OperationLog.LoggingLevel currentLoggingMode = log.getOpLoggingLevel(); + // If logging is disabled, deny everything. + if (currentLoggingMode == OperationLog.LoggingLevel.NONE) { + return Filter.DENY; + } + // Look at the current session's setting + // and set the pattern and excludeMatches accordingly. + if (currentLoggingMode != loggingMode) { + loggingMode = currentLoggingMode; + setCurrentNamePattern(loggingMode); + } + boolean isMatch = namePattern.matcher(ev.getLoggerName()).matches(); + if (excludeMatches == isMatch) { // Deny if this is black-list filter (excludeMatches = true) and it // matched @@ -70,25 +131,61 @@ /** This is where the log message will go to */ private final CharArrayWriter writer = new CharArrayWriter(); - public LogDivertAppender(Layout layout, OperationManager operationManager, boolean isVerbose) { - setLayout(layout); + private void setLayout (boolean isVerbose, Layout lo) { + if (isVerbose) { + if (lo == null) { + lo = CLIServiceUtils.verboseLayout; + LOG.info("Cannot find a Layout from a ConsoleAppender. Using default Layout pattern."); + } + } else { + lo = CLIServiceUtils.nonVerboseLayout; + } + setLayout(lo); + } + + private void initLayout(boolean isVerbose) { + // There should be a ConsoleAppender. Copy its Layout. + Logger root = Logger.getRootLogger(); + Layout layout = null; + + Enumeration appenders = root.getAllAppenders(); + while (appenders.hasMoreElements()) { + Appender ap = (Appender) appenders.nextElement(); + if (ap.getClass().equals(ConsoleAppender.class)) { + layout = ap.getLayout(); + break; + } + } + setLayout(isVerbose, layout); + } + + public LogDivertAppender(OperationManager operationManager, + OperationLog.LoggingLevel loggingMode) { + isVerbose = (loggingMode == OperationLog.LoggingLevel.VERBOSE); + initLayout(isVerbose); setWriter(writer); setName("LogDivertAppender"); this.operationManager = operationManager; + this.verboseLayout = isVerbose ? layout : CLIServiceUtils.verboseLayout; + addFilter(new NameFilter(loggingMode, operationManager)); + } - if (isVerbose) { - // Filter out messages coming from log processing classes, or we'll run an - // infinite loop. - String[] exclLoggerNames = { LOG.getName(), OperationLog.class.getName(), - OperationManager.class.getName() }; - addFilter(new NameFilter(true, exclLoggerNames)); - } else { - // in non verbose mode, show only select logger messages - String[] inclLoggerNames = { "org.apache.hadoop.mapreduce.JobSubmitter", - "org.apache.hadoop.mapreduce.Job", "SessionState", Task.class.getName(), - "org.apache.hadoop.hive.ql.exec.spark.status.SparkJobMonitor"}; - addFilter(new NameFilter(false, inclLoggerNames)); + @Override + public void doAppend(LoggingEvent event) { + OperationLog log = operationManager.getOperationLogByThread(); + + // Set current layout depending on the verbose/non-verbose mode. + if (log != null) { + boolean isCurrModeVerbose = (log.getOpLoggingLevel() == OperationLog.LoggingLevel.VERBOSE); + + // If there is a logging level change from verbose->non-verbose or vice-versa since + // the last subAppend call, change the layout to preserve consistency. + if (isCurrModeVerbose != isVerbose) { + isVerbose = isCurrModeVerbose; + setLayout(isVerbose, verboseLayout); + } } + super.doAppend(event); } /** Index: service/src/java/org/apache/hive/service/cli/operation/Operation.java =================================================================== --- service/src/java/org/apache/hive/service/cli/operation/Operation.java (revision 1673556) +++ service/src/java/org/apache/hive/service/cli/operation/Operation.java (working copy) @@ -210,7 +210,7 @@ // create OperationLog object with above log file try { - operationLog = new OperationLog(opHandle.toString(), operationLogFile); + operationLog = new OperationLog(opHandle.toString(), operationLogFile, parentSession.getHiveConf()); } catch (FileNotFoundException e) { LOG.warn("Unable to instantiate OperationLog object for operation: " + opHandle, e); Index: service/src/java/org/apache/hive/service/cli/operation/OperationManager.java =================================================================== --- service/src/java/org/apache/hive/service/cli/operation/OperationManager.java (revision 1673556) +++ service/src/java/org/apache/hive/service/cli/operation/OperationManager.java (working copy) @@ -20,7 +20,6 @@ import java.sql.SQLException; import java.util.ArrayList; -import java.util.Enumeration; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -42,10 +41,7 @@ import org.apache.hive.service.cli.TableSchema; import org.apache.hive.service.cli.session.HiveSession; import org.apache.log4j.Appender; -import org.apache.log4j.ConsoleAppender; -import org.apache.log4j.Layout; import org.apache.log4j.Logger; -import org.apache.log4j.PatternLayout; /** * OperationManager. @@ -54,7 +50,6 @@ public class OperationManager extends AbstractService { private final Log LOG = LogFactory.getLog(OperationManager.class.getName()); - private HiveConf hiveConf; private final Map handleToOperation = new HashMap(); @@ -64,10 +59,9 @@ @Override public synchronized void init(HiveConf hiveConf) { - this.hiveConf = hiveConf; if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { - boolean isVerbose = hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_VERBOSE); - initOperationLogCapture(isVerbose); + initOperationLogCapture(hiveConf.getVar( + HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL)); } else { LOG.debug("Operation level logging is turned off"); } @@ -86,34 +80,10 @@ super.stop(); } - private void initOperationLogCapture(boolean isVerbose) { - // There should be a ConsoleAppender. Copy its Layout. - Logger root = Logger.getRootLogger(); - Layout layout = null; - - Enumeration appenders = root.getAllAppenders(); - while (appenders.hasMoreElements()) { - Appender ap = (Appender) appenders.nextElement(); - if (ap.getClass().equals(ConsoleAppender.class)) { - layout = ap.getLayout(); - break; - } - } - - final String VERBOSE_PATTERN = "%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n"; - final String NONVERBOSE_PATTERN = "%-5p : %m%n"; - - if (isVerbose) { - if (layout == null) { - layout = new PatternLayout(VERBOSE_PATTERN); - LOG.info("Cannot find a Layout from a ConsoleAppender. Using default Layout pattern."); - } - } else { - layout = new PatternLayout(NONVERBOSE_PATTERN); - } + private void initOperationLogCapture(String loggingMode) { // Register another Appender (with the same layout) that talks to us. - Appender ap = new LogDivertAppender(layout, this, isVerbose); - root.addAppender(ap); + Appender ap = new LogDivertAppender(this, OperationLog.getLoggingLevel(loggingMode)); + Logger.getRootLogger().addAppender(ap); } public ExecuteStatementOperation newExecuteStatementOperation(HiveSession parentSession, Index: service/src/java/org/apache/hive/service/cli/session/HiveSession.java =================================================================== --- service/src/java/org/apache/hive/service/cli/session/HiveSession.java (revision 1673556) +++ service/src/java/org/apache/hive/service/cli/session/HiveSession.java (working copy) @@ -151,4 +151,6 @@ throws HiveSQLException; void closeExpiredOperations(); + + long getNoOperationTime(); } Index: service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java =================================================================== --- service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java (revision 1673556) +++ service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java (working copy) @@ -91,6 +91,7 @@ private boolean isOperationLogEnabled; private File sessionLogDir; private volatile long lastAccessTime; + private volatile long lastIdleTime; public HiveSessionImpl(TProtocolVersion protocol, String username, String password, HiveConf serverhiveConf, String ipAddress) { @@ -147,6 +148,7 @@ configureSession(sessionConfMap); } lastAccessTime = System.currentTimeMillis(); + lastIdleTime = lastAccessTime; } /** @@ -295,6 +297,11 @@ if (userAccess) { lastAccessTime = System.currentTimeMillis(); } + if (opHandleSet.isEmpty()) { + lastIdleTime = System.currentTimeMillis(); + } else { + lastIdleTime = 0; + } } @Override @@ -605,6 +612,11 @@ } } + @Override + public long getNoOperationTime() { + return lastIdleTime > 0 ? System.currentTimeMillis() - lastIdleTime : 0; + } + private void closeTimedOutOperations(List operations) { acquire(false); try { Index: service/src/java/org/apache/hive/service/cli/session/SessionManager.java =================================================================== --- service/src/java/org/apache/hive/service/cli/session/SessionManager.java (revision 1673556) +++ service/src/java/org/apache/hive/service/cli/session/SessionManager.java (working copy) @@ -62,6 +62,7 @@ private long checkInterval; private long sessionTimeout; + private boolean checkOperation; private volatile boolean shutdown; // The HiveServer2 instance running this service @@ -107,6 +108,8 @@ hiveConf, ConfVars.HIVE_SERVER2_SESSION_CHECK_INTERVAL, TimeUnit.MILLISECONDS); sessionTimeout = HiveConf.getTimeVar( hiveConf, ConfVars.HIVE_SERVER2_IDLE_SESSION_TIMEOUT, TimeUnit.MILLISECONDS); + checkOperation = HiveConf.getBoolVar(hiveConf, + ConfVars.HIVE_SERVER2_IDLE_SESSION_CHECK_OPERATION); } private void initOperationLogRootDir() { @@ -155,7 +158,8 @@ for (sleepInterval(interval); !shutdown; sleepInterval(interval)) { long current = System.currentTimeMillis(); for (HiveSession session : new ArrayList(handleToSession.values())) { - if (sessionTimeout > 0 && session.getLastAccessTime() + sessionTimeout <= current) { + if (sessionTimeout > 0 && session.getLastAccessTime() + sessionTimeout <= current + && (!checkOperation || session.getNoOperationTime() > sessionTimeout)) { SessionHandle handle = session.getSessionHandle(); LOG.warn("Session " + handle + " is Timed-out (last access : " + new Date(session.getLastAccessTime()) + ") and will be closed"); Index: service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java =================================================================== --- service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java (revision 1673556) +++ service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java (working copy) @@ -94,9 +94,10 @@ server = new TThreadPoolServer(sargs); server.setServerEventHandler(serverEventHandler); server.serve(); - String msg = "Started " + ThriftBinaryCLIService.class.getSimpleName() + " on port " + String msg = "Starting " + ThriftBinaryCLIService.class.getSimpleName() + " on port " + portNum + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads"; LOG.info(msg); + server.serve(); } catch (Throwable t) { LOG.fatal( "Error starting HiveServer2: could not start " Index: service/src/java/org/apache/hive/service/server/HiveServer2.java =================================================================== --- service/src/java/org/apache/hive/service/server/HiveServer2.java (revision 1673556) +++ service/src/java/org/apache/hive/service/server/HiveServer2.java (working copy) @@ -297,6 +297,18 @@ } } + private static void startPauseMonitor(HiveConf conf) throws Exception { + try { + Class.forName("org.apache.hadoop.util.JvmPauseMonitor"); + org.apache.hadoop.util.JvmPauseMonitor pauseMonitor = + new org.apache.hadoop.util.JvmPauseMonitor(conf); + pauseMonitor.start(); + } catch (Throwable t) { + LOG.warn("Could not initiate the JvmPauseMonitor thread." + + " GCs and Pauses may not be warned upon.", t); + } + } + private static void startHiveServer2() throws Throwable { long attempts = 0, maxAttempts = 1; while (true) { @@ -308,6 +320,7 @@ server = new HiveServer2(); server.init(hiveConf); server.start(); + startPauseMonitor(hiveConf); // If we're supporting dynamic service discovery, we'll add the service uri for this // HiveServer2 instance to Zookeeper as a znode. if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY)) { Index: service/src/test/org/apache/hive/service/cli/operation/TestOperationLoggingAPI.java =================================================================== --- service/src/test/org/apache/hive/service/cli/operation/TestOperationLoggingAPI.java (revision 1673556) +++ service/src/test/org/apache/hive/service/cli/operation/TestOperationLoggingAPI.java (working copy) @@ -1,268 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.service.cli.operation; - -import java.io.File; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hive.service.cli.FetchOrientation; -import org.apache.hive.service.cli.FetchType; -import org.apache.hive.service.cli.HiveSQLException; -import org.apache.hive.service.cli.OperationHandle; -import org.apache.hive.service.cli.OperationState; -import org.apache.hive.service.cli.OperationStatus; -import org.apache.hive.service.cli.RowSet; -import org.apache.hive.service.cli.SessionHandle; -import org.apache.hive.service.cli.thrift.EmbeddedThriftBinaryCLIService; -import org.apache.hive.service.cli.thrift.ThriftCLIServiceClient; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; - -/** - * TestOperationLoggingAPI - * Test the FetchResults of TFetchType.LOG in thrift level. - */ -public class TestOperationLoggingAPI { - private static HiveConf hiveConf; - private final String tableName = "testOperationLoggingAPI_table"; - private File dataFile; - private ThriftCLIServiceClient client; - private SessionHandle sessionHandle; - private final String sql = "select * from " + tableName; - private final String[] expectedLogs = { - "Parsing command", - "Parse Completed", - "Starting Semantic Analysis", - "Semantic Analysis Completed", - "Starting command" - }; - - @BeforeClass - public static void setUpBeforeClass() { - hiveConf = new HiveConf(); - hiveConf.setBoolean(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_VERBOSE.varname, true); - } - - /** - * Start embedded mode, open a session, and create a table for cases usage - * @throws Exception - */ - @Before - public void setUp() throws Exception { - dataFile = new File(hiveConf.get("test.data.files"), "kv1.txt"); - EmbeddedThriftBinaryCLIService service = new EmbeddedThriftBinaryCLIService(); - service.init(hiveConf); - client = new ThriftCLIServiceClient(service); - sessionHandle = setupSession(); - } - - @After - public void tearDown() throws Exception { - // Cleanup - String queryString = "DROP TABLE " + tableName; - client.executeStatement(sessionHandle, queryString, null); - - client.closeSession(sessionHandle); - } - - @Test - public void testFetchResultsOfLog() throws Exception { - // verify whether the sql operation log is generated and fetch correctly. - OperationHandle operationHandle = client.executeStatement(sessionHandle, sql, null); - RowSet rowSetLog = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, - FetchType.LOG); - verifyFetchedLog(rowSetLog); - } - - @Test - public void testFetchResultsOfLogAsync() throws Exception { - // verify whether the sql operation log is generated and fetch correctly in async mode. - OperationHandle operationHandle = client.executeStatementAsync(sessionHandle, sql, null); - - // Poll on the operation status till the query is completed - boolean isQueryRunning = true; - long pollTimeout = System.currentTimeMillis() + 100000; - OperationStatus opStatus; - OperationState state = null; - RowSet rowSetAccumulated = null; - StringBuilder logs = new StringBuilder(); - - while (isQueryRunning) { - // Break if polling times out - if (System.currentTimeMillis() > pollTimeout) { - break; - } - opStatus = client.getOperationStatus(operationHandle); - Assert.assertNotNull(opStatus); - state = opStatus.getState(); - - rowSetAccumulated = client.fetchResults(operationHandle, FetchOrientation.FETCH_NEXT, 1000, - FetchType.LOG); - for (Object[] row : rowSetAccumulated) { - logs.append(row[0]); - } - - if (state == OperationState.CANCELED || - state == OperationState.CLOSED || - state == OperationState.FINISHED || - state == OperationState.ERROR) { - isQueryRunning = false; - } - Thread.sleep(10); - } - // The sql should be completed now. - Assert.assertEquals("Query should be finished", OperationState.FINISHED, state); - - // Verify the accumulated logs - verifyFetchedLog(logs.toString()); - - // Verify the fetched logs from the beginning of the log file - RowSet rowSet = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, - FetchType.LOG); - verifyFetchedLog(rowSet); - } - - @Test - public void testFetchResultsOfLogWithOrientation() throws Exception { - // (FETCH_FIRST) execute a sql, and fetch its sql operation log as expected value - OperationHandle operationHandle = client.executeStatement(sessionHandle, sql, null); - RowSet rowSetLog = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, - FetchType.LOG); - int expectedLogLength = rowSetLog.numRows(); - - // (FETCH_NEXT) execute the same sql again, - // and fetch the sql operation log with FETCH_NEXT orientation - OperationHandle operationHandleWithOrientation = client.executeStatement(sessionHandle, sql, - null); - RowSet rowSetLogWithOrientation; - int logLength = 0; - int maxRows = calculateProperMaxRows(expectedLogLength); - do { - rowSetLogWithOrientation = client.fetchResults(operationHandleWithOrientation, - FetchOrientation.FETCH_NEXT, maxRows, FetchType.LOG); - logLength += rowSetLogWithOrientation.numRows(); - } while (rowSetLogWithOrientation.numRows() == maxRows); - Assert.assertEquals(expectedLogLength, logLength); - - // (FETCH_FIRST) fetch again from the same operation handle with FETCH_FIRST orientation - rowSetLogWithOrientation = client.fetchResults(operationHandleWithOrientation, - FetchOrientation.FETCH_FIRST, 1000, FetchType.LOG); - verifyFetchedLog(rowSetLogWithOrientation); - } - - @Test - public void testFetchResultsOfLogCleanup() throws Exception { - // Verify cleanup functionality. - // Open a new session, since this case needs to close the session in the end. - SessionHandle sessionHandleCleanup = setupSession(); - - // prepare - OperationHandle operationHandle = client.executeStatement(sessionHandleCleanup, sql, null); - RowSet rowSetLog = client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, - FetchType.LOG); - verifyFetchedLog(rowSetLog); - - File sessionLogDir = new File( - hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION) + - File.separator + sessionHandleCleanup.getHandleIdentifier()); - File operationLogFile = new File(sessionLogDir, operationHandle.getHandleIdentifier().toString()); - - // check whether exception is thrown when fetching log from a closed operation. - client.closeOperation(operationHandle); - try { - client.fetchResults(operationHandle, FetchOrientation.FETCH_FIRST, 1000, FetchType.LOG); - Assert.fail("Fetch should fail"); - } catch (HiveSQLException e) { - Assert.assertTrue(e.getMessage().contains("Invalid OperationHandle:")); - } - - // check whether operation log file is deleted. - if (operationLogFile.exists()) { - Assert.fail("Operation log file should be deleted."); - } - - // check whether session log dir is deleted after session is closed. - client.closeSession(sessionHandleCleanup); - if (sessionLogDir.exists()) { - Assert.fail("Session log dir should be deleted."); - } - } - - private SessionHandle setupSession() throws Exception { - // Open a session - SessionHandle sessionHandle = client.openSession(null, null, null); - - // Change lock manager to embedded mode - String queryString = "SET hive.lock.manager=" + - "org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager"; - client.executeStatement(sessionHandle, queryString, null); - - // Drop the table if it exists - queryString = "DROP TABLE IF EXISTS " + tableName; - client.executeStatement(sessionHandle, queryString, null); - - // Create a test table - queryString = "create table " + tableName + " (key int, value string)"; - client.executeStatement(sessionHandle, queryString, null); - - // Load data - queryString = "load data local inpath '" + dataFile + "' into table " + tableName; - client.executeStatement(sessionHandle, queryString, null); - - // Precondition check: verify whether the table is created and data is fetched correctly. - OperationHandle operationHandle = client.executeStatement(sessionHandle, sql, null); - RowSet rowSetResult = client.fetchResults(operationHandle); - Assert.assertEquals(500, rowSetResult.numRows()); - Assert.assertEquals(238, rowSetResult.iterator().next()[0]); - Assert.assertEquals("val_238", rowSetResult.iterator().next()[1]); - - return sessionHandle; - } - - // Since the log length of the sql operation may vary during HIVE dev, calculate a proper maxRows. - private int calculateProperMaxRows(int len) { - if (len < 10) { - return 1; - } else if (len < 100) { - return 10; - } else { - return 100; - } - } - - private void verifyFetchedLog(RowSet rowSet) { - StringBuilder stringBuilder = new StringBuilder(); - - for (Object[] row : rowSet) { - stringBuilder.append(row[0]); - } - - String logs = stringBuilder.toString(); - verifyFetchedLog(logs); - } - - private void verifyFetchedLog(String logs) { - for (String log : expectedLogs) { - Assert.assertTrue("Checking for presence of " + log, logs.contains(log)); - } - } -} Index: shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java =================================================================== --- shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java (revision 1673556) +++ shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java (working copy) @@ -29,6 +29,9 @@ * */ public abstract class ShimLoader { + public static String HADOOP20SVERSIONNAME = "0.20S"; + public static String HADOOP23VERSIONNAME = "0.23"; + private static HadoopShims hadoopShims; private static JettyShims jettyShims; private static AppenderSkeleton eventCounter; @@ -42,8 +45,8 @@ new HashMap(); static { - HADOOP_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.hive.shims.Hadoop20SShims"); - HADOOP_SHIM_CLASSES.put("0.23", "org.apache.hadoop.hive.shims.Hadoop23Shims"); + HADOOP_SHIM_CLASSES.put(HADOOP20SVERSIONNAME, "org.apache.hadoop.hive.shims.Hadoop20SShims"); + HADOOP_SHIM_CLASSES.put(HADOOP23VERSIONNAME, "org.apache.hadoop.hive.shims.Hadoop23Shims"); } /** @@ -54,8 +57,8 @@ new HashMap(); static { - JETTY_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.hive.shims.Jetty20SShims"); - JETTY_SHIM_CLASSES.put("0.23", "org.apache.hadoop.hive.shims.Jetty23Shims"); + JETTY_SHIM_CLASSES.put(HADOOP20SVERSIONNAME, "org.apache.hadoop.hive.shims.Jetty20SShims"); + JETTY_SHIM_CLASSES.put(HADOOP23VERSIONNAME, "org.apache.hadoop.hive.shims.Jetty23Shims"); } /** @@ -65,8 +68,10 @@ new HashMap(); static { - EVENT_COUNTER_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.log.metrics.EventCounter"); - EVENT_COUNTER_SHIM_CLASSES.put("0.23", "org.apache.hadoop.log.metrics.EventCounter"); + EVENT_COUNTER_SHIM_CLASSES.put(HADOOP20SVERSIONNAME, "org.apache.hadoop.log.metrics" + + ".EventCounter"); + EVENT_COUNTER_SHIM_CLASSES.put(HADOOP23VERSIONNAME, "org.apache.hadoop.log.metrics" + + ".EventCounter"); } /** @@ -76,9 +81,9 @@ new HashMap(); static { - HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put("0.20S", + HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put(HADOOP20SVERSIONNAME, "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge"); - HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put("0.23", + HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put(HADOOP23VERSIONNAME, "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge23"); } @@ -162,9 +167,9 @@ switch (Integer.parseInt(parts[0])) { case 1: - return "0.20S"; + return HADOOP20SVERSIONNAME; case 2: - return "0.23"; + return HADOOP23VERSIONNAME; default: throw new IllegalArgumentException("Unrecognized Hadoop major version number: " + vers); }